diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 8fab1091..f50e413a 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1150,6 +1150,7 @@ sub _set_paths 'anvil-safe-start' => "/usr/sbin/anvil-safe-start", 'anvil-safe-stop' => "/usr/sbin/anvil-safe-stop", 'anvil-shutdown-server' => "/usr/sbin/anvil-shutdown-server", + 'anvil-special-operations' => "/usr/sbin/anvil-special-operations", 'anvil-sync-shared' => "/usr/sbin/anvil-sync-shared", 'anvil-update-files' => "/usr/sbin/anvil-update-files", 'anvil-update-states' => "/usr/sbin/anvil-update-states", diff --git a/Anvil/Tools/Remote.pm b/Anvil/Tools/Remote.pm index 044be424..8bcea088 100644 --- a/Anvil/Tools/Remote.pm +++ b/Anvil/Tools/Remote.pm @@ -224,13 +224,9 @@ B: By default, a connection to a target will be held open and cached to in Parameters; -=head3 background (optional, default '0') - -If set to C<< 1 >>, the command is run in the background. In this case, the PID of the SSH process is returned. The called should use C<< waitpid >> to ensure the PID has been reaped. - =head3 close (optional, default '0') -If set to C<< 1 >>, the connection to the target will be closed at the end of the call. +If set, the connection to the target will be closed at the end of the call. =head3 log_level (optional, default C<< 3 >>) @@ -304,7 +300,6 @@ sub call $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "cache::ssh_fh::${ssh_fh_key}" => $anvil->data->{cache}{ssh_fh}{$ssh_fh_key} }}); # Now pick up the rest of the variables. - my $background = defined $parameter->{background} ? $parameter->{background} : 0; my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 0; my $no_cache = defined $parameter->{no_cache} ? $parameter->{no_cache} : 0; my $password = defined $parameter->{password} ? $parameter->{password} : ""; @@ -315,16 +310,15 @@ sub call my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key}; # NOTE: The shell call might contain sensitive data, so we show '--' if 'secure' is set and $anvil->Log->secure is not. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - background => $background, 'close' => $close, password => $anvil->Log->is_secure($password), secure => $secure, - shell_call => (not $secure) ? $shell_call : $anvil->Log->is_secure($shell_call), - ssh_fh => $ssh_fh, + shell_call => (not $secure) ? $shell_call : $anvil->Log->is_secure($shell_call), + ssh_fh => $ssh_fh, start_time => $start_time, timeout => $timeout, port => $port, - target => $target, + target => $target, ssh_fh_key => $ssh_fh_key, }}); @@ -649,14 +643,6 @@ sub call $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { ssh_fh => $ssh_fh }}); if ($ssh_fh =~ /^Net::OpenSSH/) { - # Are we doing a background call? - if ($background) - { - my $pid = $ssh_fh->spawn($shell_call); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { pid => $pid }}); - return($pid); - } - # The shell_call can't end is a newline. Conveniently, we want the return code. By adding # this, we ensure it doesn't end in a new-line (and we can't blindly strip off the last # new-line because of 'EOF' type cat's). diff --git a/man/Makefile.am b/man/Makefile.am index 0150315a..cce903dc 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -21,6 +21,7 @@ dist_man8_MANS = \ anvil-manage-server.8 \ anvil-manage-server-storage.8 \ anvil-manage-storage-groups.8 \ + anvil-special-operations.8 \ anvil-watch-drbd.8 \ scancore.8 \ striker-check-machines.8 \ diff --git a/man/anvil-special-operations.8 b/man/anvil-special-operations.8 new file mode 100644 index 00000000..8ef90225 --- /dev/null +++ b/man/anvil-special-operations.8 @@ -0,0 +1,32 @@ +.\" Manpage for the Anvil! storage groups +.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. +.TH anvil-special-operations "8" "Jun 30 2023" "Anvil! Intelligent Availability™ Platform" +.SH NAME +anvil-special-operations \- This program is generally meant to be used by other programs. +.SH SYNOPSIS +.B anvil-special-operations +\fI\, \/\fR[\fI\,options\/\fR] +.SH DESCRIPTION +This tool is used, generally by other parts of the Anvil!, the accomplish tasks that generally can't be accomplished by direct system calls. It's a general purpose tool meant to solve specific corner cases. +.TP +.SH OPTIONS +.TP +\-?, \-h, \fB\-\-help\fR +Show this man page. +.TP +\fB\-\-log-secure\fR +When logging, record sensitive data, like passwords. +.TP +\-v, \-vv, \-vvv +Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. +.SS "Commands:" +.TP +\fB\-\-task\fR +This is the task being requested. Current optiopns are: +.IP refresh-drbd-resource +This requires \fB\-\-resource \fR, and will call 'drbdadm adjust ' as a background task and then return immediately. This is required when adding a new volume to an existing resource as 'drbdadm adjust ' will hold until it is called on all active DRBD nodes. This blocks the caller after the first remote host call. +.IP +.SH AUTHOR +Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. +.SH "REPORTING BUGS" +Report bugs to users@clusterlabs.org diff --git a/share/words.xml b/share/words.xml index e3e96bc7..d5f0ec0c 100644 --- a/share/words.xml +++ b/share/words.xml @@ -602,6 +602,7 @@ The error was: There was a problem with finding a common storage network between: [#!variable!node1_name!#] and: [#!variable!node2_name!#]. Found node 1 to have the IP: [#!variable!node1_ip!#] and node 2: [#!variable!node2_ip!#]. Is there a problem with '/etc/hosts'? Failed to find a network to use for storage replication. Is there a problem with '/etc/hosts'? + '.]]> @@ -1552,6 +1553,8 @@ Note: This is a permanent action! If you protect this server again later, a full Enabling the enable-safe-start daemon. Calling select ScanCore scan agents to ensure the database is updated. + Reload (adjust) a DRBD resource + This job is to reload (adjust) a DRBD resource. It's run as a job as it blocks until the adjust is run on all nodes. Starting: [#!variable!program!#]. @@ -2893,6 +2896,9 @@ Proceed? [y/N] The DRBD config file was not found. A protect job needs to be run from the Anvil! node hosting the server to be protected. Waiting a bit to make sure the file: [#!variable!file!#] is done uploading... Upload complete. + Picked up the special operation job. + Reloading (adjusting) the DRBD resource: [#!variable!resource!#]. This will not complete until all peers have also reloaded this resource. + DRBD resource: [#!variable!resource!#] has been reloaded. Normal Password diff --git a/tools/Makefile.am b/tools/Makefile.am index cd2b412f..b782df7a 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -37,6 +37,7 @@ dist_sbin_SCRIPTS = \ anvil-scan-network \ anvil-show-local-ips \ anvil-shutdown-server \ + anvil-special-operations \ anvil-sync-shared \ anvil-test-alerts \ anvil-update-definition \ diff --git a/tools/anvil-manage-server-storage b/tools/anvil-manage-server-storage index 95504d1b..1b0302b1 100755 --- a/tools/anvil-manage-server-storage +++ b/tools/anvil-manage-server-storage @@ -950,69 +950,52 @@ sub manage_disk_add } } + ### NOTE: The call to 'drbdadm adjust ' hangs, hard, until the same command is run on the peers. + ### To deal with this, we register jobs to run 'anvil-special-operations' on the peers, then we + ### call adjust here. # Adjust to start/connect. - my @pids; foreach my $host_type ("node", "dr") { foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}}) { my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid}; - my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource; + my $shell_call = $anvil->data->{path}{exe}{'anvil-special-operations'}." --task refresh-drbd-resource --resource ".$drbd_resource.$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:short_host_name' => $short_host_name, 's2:host_uuid' => $host_uuid, 's3:shell_call' => $shell_call, }}); + next if $host_uuid eq $anvil->Get->host_uuid; - ### NOTE: The 'adjust' call doesn't return until it's adjusted on all machines, so we - ### make these calls as background calls. - # Create the metadata, but don't exit on failure in case the metadata was created in - # a previous pass. - if ($host_uuid eq $anvil->Get->host_uuid) - { - print "- Adjusting the local resource: [".$drbd_resource."] to pick up the new config.\n"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); - ($output, $return_code) = $anvil->System->call({ - shell_call => $shell_call, - background => 1, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - output => $output, - return_code => $return_code, - }}); - } - else - { - # We'll use this in a minute to confirm connections. - $anvil->data->{peers}{$short_host_name}{host_uuid} = $host_uuid; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "peers::${short_host_name}::host_uuid" => $anvil->data->{peers}{$short_host_name}{host_uuid}, - }}); - - ### NOTE: This is expected to timeout when DR is used. - print "- Adjusting the peer: [".$short_host_name."]'s resource: [".$drbd_resource."] to pick up the new config.\n"; - my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; - my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; - my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource; - my ($pid) = $anvil->Remote->call({ - debug => 2, - background => 1, - shell_call => $shell_call, - target => $use_ip, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); - push @pids, $pid; - } + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + debug => 2, + job_command => $shell_call, + job_data => "adjust=".$drbd_resource, + job_name => "server::add_disk::rescan", + job_title => "job_0465", + job_description => "job_0466", + job_progress => 0, + job_host_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + + print "- Registered a job with job UUID: [".$job_uuid."] to reload the resource config on the host: [".$short_host_name."].\n"; } } - # Wait for the remote PID(s) to be reaped. -# foreach my $pid (@pids) -# { -# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); -# next if not $pid; -# waitpid($pid, 0); -# } + print "- Adjusting the local resource: [".$drbd_resource."] to pick up the new config.\n"; + print "[ NOTE ] - If this hangs, make sure 'anvil-daemon' is running on the peers.\n"; + $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + ($output, $return_code) = $anvil->System->call({ + debug => 2, + background => 1, + shell_call => $shell_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); # Find which node is currently Primary and use that host to force primary to start sync. If none, # force here. @@ -1171,7 +1154,7 @@ sub manage_disk_add { my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; - print " - The resource is primary onthe peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]"; + print " - The resource is primary on the peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]"; my ($output, $error, $return_code) = $anvil->Remote->call({ shell_call => $shell_call, target => $use_ip, @@ -1214,14 +1197,12 @@ sub manage_disk_add { print "Initial sync does not appear to be required.\n"; } - -# my $startup_needed = 1; -# my $local_role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} : ""; -# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_role => $local_role }}); - - - + =cut + my $startup_needed = 1; + my $local_role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_role => $local_role }}); + # Create the DRBD metadata. For this, we don't fail. foreach my $host_type ("node", "dr") { diff --git a/tools/anvil-special-operations b/tools/anvil-special-operations new file mode 100755 index 00000000..d434d196 --- /dev/null +++ b/tools/anvil-special-operations @@ -0,0 +1,120 @@ +#!/usr/bin/perl +# +# This program has no specific purpose. It's a general program for performing certain special tasks that +# can't be done otherwise in a reliable or efficient way. +# +# Exit codes; +# 0 = Normal exit. +# 1 = No database connection. + + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; +use Text::Diff; +use Data::Dumper; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); + +# Read switches (target ([user@]host[:port]) and the file with the target's password. +$anvil->Get->switches({list => [ + "task", + "resource", + ], man => $THIS_FILE}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); + +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +if (not $anvil->data->{sys}{database}{connections}) +{ + # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try + # again after we exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); +} + +if ($anvil->data->{switches}{'job-uuid'}) +{ + $anvil->Job->clear(); + $anvil->Job->get_job_details({debug => 2}); + $anvil->Job->update_progress({ + progress => 1, + job_picked_up_by => $$, + job_picked_up_at => time, + message => "message_0311", + }); +} +if ($anvil->data->{switches}{task} eq "refresh-drbd-resource") +{ + refresh_drbd_resource($anvil); +} + +$anvil->nice_exit({exit_code => 0}); + + +############################################################################################################# +# Functions # +############################################################################################################# + +# This function is needed to call 'drbdadm adjust ' in a background call from a remote host. This is +# needed for adding new volumes to an existing resource, as the call from 'drbdadm adjust ' won't return +# until the call is run on all hosts. +sub refresh_drbd_resource +{ + my ($anvil) = @_; + + my $resource = $anvil->data->{switches}{resource}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); + + if (not $resource) + { + # No resource. + $anvil->Job->update_progress({ + progress => 100, + message => "error_0419", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0419"}); + $anvil->nice_exit({exit_code => 1}); + } + + $anvil->Job->update_progress({ + progress => 10, + message => "message_0312,!!resource!".$resource."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0312", variables => { resource => $resource }}); + + my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + my ($output, $return_code) = $anvil->System->call({ + shell_call => $shell_call, + background => 1, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + $anvil->Job->update_progress({ + progress => 100, + message => "message_0313,!!resource!".$resource."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0313", variables => { resource => $resource }}); + + $anvil->nice_exit({exit_code => 0}); + + return(0); +} \ No newline at end of file