Merge pull request #464 from ClusterLabs/auto_grow_pv

This branch resolves issue #462; Auto growing PVs. Specifically, it l…
main
Digimer 1 year ago committed by GitHub
commit b04f12141d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      Anvil/Tools.pm
  2. 288
      Anvil/Tools/Storage.pm
  3. 2
      anvil.spec.in
  4. 8
      man/anvil-manage-host.8
  5. 44
      share/words.xml
  6. 3
      tools/anvil-configure-host
  7. 49
      tools/anvil-manage-host
  8. 4
      tools/anvil-manage-server-storage

@ -1256,6 +1256,7 @@ sub _set_paths
'osinfo-query' => "/usr/bin/osinfo-query",
pamscale => "/usr/bin/pamscale",
pamtopng => "/usr/bin/pamtopng",
parted => "/usr/sbin/parted",
passwd => "/usr/bin/passwd",
pcs => "/usr/sbin/anvil-pcs-wrapper",
perccli64 => "/opt/MegaRAID/perccli/perccli64",
@ -1271,6 +1272,8 @@ sub _set_paths
postmap => "/usr/sbin/postmap",
postqueue => "/usr/sbin/postqueue",
pwd => "/usr/bin/pwd",
pvdisplay => "/usr/sbin/pvdisplay",
pvresize => "/usr/sbin/pvresize",
pvs => "/usr/sbin/pvs",
pvscan => "/usr/sbin/pvscan",
rm => "/usr/bin/rm",
@ -1278,6 +1281,7 @@ sub _set_paths
rsync => "/usr/bin/rsync",
sed => "/usr/bin/sed",
setsid => "/usr/bin/setsid", # See: https://serverfault.com/questions/1105733/virsh-command-hangs-when-script-runs-in-the-background
sfdisk => "/usr/sbin/sfdisk",
'shutdown' => "/usr/sbin/shutdown",
snmpget => "/usr/bin/snmpget",
snmpset => "/usr/bin/snmpset",

@ -16,6 +16,7 @@ our $VERSION = "3.0.0";
my $THIS_FILE = "Storage.pm";
### Methods;
# auto_grow_pv
# backup
# change_mode
# change_owner
@ -113,6 +114,291 @@ sub parent
#############################################################################################################
=head2 auto_grow_pv
This looks at LVM PVs on the local host. For each one that is found, C<< parted >> is called to check if there's more that 1 GiB of free space available after it. If so, it will extend the PV partition to use the free space.
This method takes no parameters.
=cut
sub auto_grow_pv
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->_auto_grow_pv()" }});
# Look for disks that has unpartitioned space and grow it if needed.
my $host_uuid = $anvil->Get->host_uuid();
my $short_host_name = $anvil->Get->short_host_name();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_uuid' => $host_uuid,
's2:short_host_name' => $short_host_name,
}});
my $shell_call = $anvil->data->{path}{exe}{pvs}." --noheadings --units b -o pv_name,vg_name,pv_size,pv_free --separator ,";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Bad return code.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0159", variables => {
shell_call => $shell_call,
return_code => $return_code,
output => $output,
}});
next;
}
my $pv_found = 0;
foreach my $line (split/\n/, $output)
{
$line = $anvil->Words->clean_spaces({string => $line});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
my ($pv_name, $used_by_vg, $pv_size, $pv_free) = (split/,/, $line);
$pv_size =~ s/B$//;
$pv_free =~ s/B$//;
my $pv_used = $pv_size - $pv_free;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
pv_name => $pv_name,
used_by_vg => $used_by_vg,
pv_size => $pv_size." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $pv_size}).")",
pv_free => $pv_free." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $pv_free}).")",
pv_used => $pv_used." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $pv_used}).")",
}});
# Get the raw backing disk.
my $device_path = "";
my $pv_partition = 0;
if ($pv_name =~ /(\/dev\/nvme\d+n\d+)p(\d+)$/)
{
$device_path = $1;
$pv_partition = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_path => $device_path,
pv_partition => $pv_partition,
}});
}
elsif ($pv_name =~ /(\/dev\/\w+)(\d+)$/)
{
$device_path = $1;
$pv_partition = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_path => $device_path,
pv_partition => $pv_partition,
}});
}
else
{
# No device found for the PV.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0821", variables => { pv_name => $pv_name }});
next;
}
# See how much free space there is on the backing disk.
my $shell_call = $anvil->data->{path}{exe}{parted}." --align optimal ".$device_path." unit B print free";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Bad return code.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0159", variables => {
shell_call => $shell_call,
return_code => $return_code,
output => $output,
}});
next;
}
my $pv_found = 0;
foreach my $line (split/\n/, $output)
{
$line = $anvil->Words->clean_spaces({string => $line});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($pv_found)
{
#print "Checking if: [".$line."] is free space.\n";
if ($line =~ /^(\d+)B\s+(\d+)B\s+(\d+)B\s+Free Space/i)
{
my $start_byte = $1;
my $end_byte = $2;
my $size = $3;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:start_byte' => $start_byte." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $start_byte}).")",
's2:end_byte' => $end_byte." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $end_byte}).")",
's3:size' => $pv_used." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $size}).")",
}});
# There's free space! If it's greater than 1 GiB, grow it automatically.
if ($size < 1073741824)
{
# Not enough free space
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0823", variables => {
free_space => $anvil->Convert->bytes_to_human_readable({'bytes' => $size}),
device_path => $device_path,
pv_partition => $pv_partition,
}});
next;
}
else
{
# Enough free space, grow!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0822", variables => {
free_space => $anvil->Convert->bytes_to_human_readable({'bytes' => $size}),
device_path => $device_path,
pv_partition => $pv_partition,
}});
### Backup the partition table.
#sfdisk --dump /dev/sda > partition_table_backup_sda
my $device_name = ($device_path =~ /^\/dev\/(.*)$/)[0];
my $partition_backup = "/tmp/".$device_name.".partition_table_backup";
my $shell_call = $anvil->data->{path}{exe}{sfdisk}." --dump ".$device_path." > ".$partition_backup;
my $restore_shell_call = $anvil->data->{path}{exe}{sfdisk}." ".$device_path." < ".$partition_backup." --force";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_name => $device_name,
partition_backup => $partition_backup,
shell_call => $shell_call,
}});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Bad return code.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0159", variables => {
shell_call => $shell_call,
return_code => $return_code,
output => $output,
}});
next;
}
else
{
# Tell the user about the backup.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0361", variables => {
device_path => $device_path,
partition_backup => $partition_backup,
restore_command => $restore_shell_call,
}});
}
### Grow the partition
# parted --align optimal /dev/sda ---pretend-input-tty resizepart 2 100% Yes; echo $?
$shell_call = $anvil->data->{path}{exe}{parted}." --align optimal ".$device_path." ---pretend-input-tty resizepart ".$pv_partition." 100% Yes";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Bad return code.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0159", variables => {
shell_call => $shell_call,
return_code => $return_code,
output => $output,
}});
### Restore the partition table
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0467"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { restore_shell_call => $restore_shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $restore_shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Error out.
$anvil->nice_exit({exit_code => 1});
}
else
{
# Looks like it worked. Call print again to log the new value.
my $shell_call = $anvil->data->{path}{exe}{parted}." --align optimal ".$device_path." unit B print free";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0825", variables => {
pv_name => $pv_name,
output => $output,
}});
}
### Resize the PV.
$shell_call = $anvil->data->{path}{exe}{pvresize}." ".$pv_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Bad return code.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0159", variables => {
shell_call => $shell_call,
return_code => $return_code,
output => $output,
}});
next;
}
else
{
# Looks like it worked. Call print again to log the new value.
my $shell_call = $anvil->data->{path}{exe}{pvdisplay}." ".$pv_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0826", variables => {
pv_name => $pv_name,
output => $output,
}});
}
# Done.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0827", variables => { pv_name => $pv_name }});
}
}
else
{
# There's another partition after this PV, do nothing.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0824", variables => {
device_path => $device_path,
pv_partition => $pv_partition,
}});
next;
}
}
elsif ($line =~ /^$pv_partition\s/)
{
$pv_found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pv_found => $pv_found }});
}
else
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_path => $device_path,
pv_partition => $pv_partition,
pv_found => $pv_found,
}});
}
}
}
return(0);
}
=head2 backup
This will create a copy of the file under the C<< path::directories::backups >> directory with the datestamp as a suffix. The path is preserved under the backup directory. The path and file name are returned.
@ -5647,7 +5933,7 @@ fi";
#############################################################################################################
=head2
=head2 _create_rsync_wrapper
This does the actual work of creating the C<< expect >> wrapper script and returns the path to that wrapper for C<< rsync >> calls.

@ -67,6 +67,7 @@ Requires: mlocate
Requires: net-snmp-utils
Requires: NetworkManager-initscripts-updown
Requires: nvme-cli
Requires: parted
Requires: pciutils
Requires: perl-Capture-Tiny
Requires: perl-Data-Dumper
@ -110,6 +111,7 @@ Requires: tcpdump
Requires: tmux
Requires: unzip
Requires: usbutils
Requires: util-linux
Requires: vim
Requires: wget
# iptables-services conflicts with firewalld

@ -24,6 +24,11 @@ Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a s
\fB\-\-age\-out\-database\fR
This requests the database check for records that are too old and purge them.
.TP
\fB\-\-auto\-grow\-pv\fR
This looks at LVM physical volumes on the local host. For each one that is found, 'parted' is called to check if there's more that 1 GiB of free space available after it. If so, it will extend the PV partition to use the free space.
.TP
If you deleted the default '/home' partition during the install of a subnode or DR host, this should give you that space back.
.TP
\fB\-\-check\-configured\fR
Check to see if the host is marked as configured or yet.
.TP
@ -33,6 +38,9 @@ This checks to see if the database is enabled or not.
\fB\-\-check\-network\-mapping\fR
This reports if the host is currently in network mapping (this disables several features and watches the network states much more frequently)
.TP
\fB\-\-confirm\fR
This confirms actions that would normally prompt the user to confirm before proceeding.
.TP
\fB\-\-database\-active\fR
This enables the database on the local Striker dashboard.
.TP

@ -735,6 +735,7 @@ The XML that failed sanity check was:
]]></key>
<key name="error_0465"><![CDATA[The file: [#!variable!file!#] doesn't exist on the target: [#!variable!target!#].]]></key>
<key name="error_0466"><![CDATA[The Anvil! string (name or UUID): [#!variable!string!#] didn't match any known Anvil! in the database.]]></key>
<key name="error_0467"><![CDATA[[ Error ] - The repartition attemp failed! Reloading the partition table now!]]]></key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which lines are translatable -->
@ -2633,6 +2634,22 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0818">The server: [#!variable!server_name!#] libvirt definition will now be updated.</key>
<key name="log_0819">Check to verify that the connection to the server: [#!variable!server_name!#] is valid.</key>
<key name="log_0820">The network mapping flag is NOT set.</key>
<key name="log_0821">No device found for PV: [#!variable!pv_name!#], skipping it.</key>
<key name="log_0822">Found: [#!variable!free_space!#] free space after the PV partition: [#!variable!device_path!#:#!variable!pv_partition!#]! Will grow the partition to use the free space.</key>
<key name="log_0823">Found: [#!variable!free_space!#] free space after the PV partition: [#!variable!device_path!#:#!variable!pv_partition!#]. This is too small for auto-growing the partition.</key>
<key name="log_0824">Found the PV partition: [#!variable!device_path!#:#!variable!device_partition!#], but there's another partition after it. Not going to grow it, of course.</key>
<key name="log_0825">The partition: [#!variable!pv_name!#] appears to have been grown successfully. The new partition scheme is:
====
#!variable!output!#
====
</key>
<key name="log_0826">The resize appears to have been successful. The physical volume: [#!variable!pv_name!#] details are now:
====
#!variable!output!#
====
</key>
<key name="log_0827">The physical volume: [#!variable!pv_name!#] has been resized!</key>
<key name="log_0828">The user answered: [#!variable!answer!#]</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -3186,6 +3203,26 @@ Proceed? [y/N]</key>
<key name="message_0355">This host is already NOT configured to map the network.</key>
<key name="message_0356">This host is no longer configured to map the network.</key>
<key name="message_0357">No hosts with IPMI found, done.</key>
<key name="message_0358">Searching for free space to grow PVs into.</key>
<key name="message_0359">- The '--confirm' switch was used, proceeding.</key>
<key name="message_0360"><![CDATA[
[ Warning ] - Auto-growing the LVM physical volumes could, in some case, leave the system unbootable.
The steps that will taken are;
- LVM Pyhiscal volumes will be found.
- For each found, 'parted' is used to see if there is > 1GiB of free space available.
- If so, and if no other partitions are after it, it will be grown to use the free space.
- The PV itself will then be resized to use the new space
This is generally used just after initializing a new subnode or DR host. If this host has real data
on it, please proceed with caution.
The partition table will be backed up, and if the partition resize fails, the partition table will be
reloaded automatically. If this host has real data, ensure a complete backup is available before
proceeding.
]]></key>
<key name="message_0361">- [ Note ] - The original partition table for: [#!variable!device_path!#] has been saved to: [#!variable!partition_backup!#]
If anything goes wrong, we will attempt to recover automatically. If needed, you can try
recovering with: [#!variable!restore_command!#]</key>
<!-- Translate names (protocols, etc) -->
<key name="name_0001">Normal Password</key> <!-- none in mail-server -->
@ -3933,7 +3970,11 @@ We will wait: [#!variable!waiting!#] seconds and then try again. We'll give up i
<key name="warning_0156">[ Warning ] - The file: [#!variable!file_path!#] needed to provision the server: [#!variable!server_name!#] was found, but it's not ready yet.</key>
<key name="warning_0157">[ Warning ] - Waiting for a bit, and then will check if files are ready.</key>
<key name="warning_0158">[ Warning ] - There is a duplicate storage group named: [#!variable!group_name!#]. Keeping the group with UUID: [#!variable!keep_uuid!#], and deleting the group with the UUID: [#!variable!delete_uuid!#]</key>
<key name="warning_0159">Please specify a storage group to use to add the new drive to.</key>
<key name="warning_0159">[ Warning ] - The system call: [#!variable!shell_call!#] returned the non-zero return code: [#!variable!return_code!#]. The command output, if anything, was:
====
#!variable!output!#
====
</key>
<key name="warning_0160"> Warning!
[ Warning ] - When trying to create the local meta-data on: [#!variable!drbd_resource!#/#!variable!next_drbd_volume!#]
[ Warning ] - using the command: [#!variable!shell_call!#]
@ -3970,6 +4011,7 @@ We will try to proceed anyway.</key>
#!variable!error!#
====
</key>
<key name="warning_0168">Please specify a storage group to use to add the new drive to.</key>
</language>
<!-- 日本語 -->

@ -64,8 +64,9 @@ pickup_job_details($anvil);
overwrite_variables_with_switches($anvil);
# Set maintenance mode
$anvil->System->maintenance_mode({set => 1, debug => 2});
$anvil->System->maintenance_mode({set => 1});
# Reconfigure the network.
reconfigure_network($anvil);
# Record that we've configured this machine.

@ -26,9 +26,11 @@ my $anvil = Anvil::Tools->new();
# Read switches
$anvil->Get->switches({list => [
"age-out-database",
"auto-grow-pv",
"check-configured",
"check-database",
"check-network-mapping",
"confirm",
"database-active",
"database-inactive",
"disable-network-mapping",
@ -68,6 +70,10 @@ elsif ($anvil->data->{switches}{'resync-database'})
{
resync_database($anvil);
}
elsif ($anvil->data->{switches}{'auto-grow-pv'})
{
auto_grow_pv($anvil);
}
else
{
# Show the options.
@ -81,6 +87,49 @@ $anvil->nice_exit({exit_code => 0});
# Functions #
#############################################################################################################
sub auto_grow_pv
{
my ($anvil) = @_;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0358"});
if ($anvil->data->{switches}{confirm})
{
# Already confirmed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0359"});
}
else
{
# Ask to confirm, with a wee bit of fear;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0360"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0021"});
my $answer = <STDIN>;
chomp $answer;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "log_0828", variables => { answer => $answer }});
if ((lc($answer) eq "y") or (lc($answer) eq "yes"))
{
# Proceed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "message_0175"});
}
else
{
# Abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0022"});
$anvil->nice_exit({exit_code => 0});
}
}
print "Enabling maintenance mode.\n";
$anvil->System->maintenance_mode({set => 1});
$anvil->Storage->auto_grow_pv({debug => 2});
print "Disabling maintenance mode.\n";
$anvil->System->maintenance_mode({set => 0});
return(0);
}
sub age_out_data
{
my ($anvil) = @_;

@ -417,11 +417,11 @@ sub manage_disk_add
}});
if (not $anvil->data->{switches}{'storage-group'})
{
print $anvil->Words->string({key => 'warning_0159'})."\n";
print $anvil->Words->string({key => 'warning_0168'})."\n";
show_storage_groups($anvil);
$anvil->Job->update_progress({
progress => 100,
message => "warning_0159",
message => "warning_0168",
}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
}

Loading…
Cancel
Save