From 29a28ee97a1fc55f036e09872e82c8036e114658 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 16 Aug 2022 19:01:36 -0400 Subject: [PATCH 1/2] * Fixed a bug with anvil-provision-server where running the command line menu from a Striker would not assign the job to the target Anvil!. * Updated Server->parse_definition() to check if a failed 'virsh list' output was passed in. Also changed it to not exit if the XML can't be parsed. Signed-off-by: Digimer --- Anvil/Tools/Server.pm | 12 +++++++++-- share/words.xml | 7 ++++++ tools/anvil-provision-server | 41 +++++++++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index b5e9c300..41f65efc 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -1362,6 +1362,14 @@ sub parse_definition return(1); } + # If whoever called us did so after a 'virsh dumpxml ' while the server was off, the "definition" + # will contain the string 'error: failed to get domain'. In such a case, return. + if ($definition =~ /error: failed to get domain/gs) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0367", variables => { definition => $definition }}); + return(1); + } + ### TODO: Switch this away from XML::Simple local $@; my $xml = XML::Simple->new(); @@ -1370,12 +1378,12 @@ sub parse_definition if (not $test) { chomp $@; - my $error = "[ Error ] - The was a problem parsing: [$definition]. The error was:\n"; + my $error = "[ Error ] - The was a problem parsing: [".$definition."]. The error was:\n"; $error .= "===========================================================\n"; $error .= $@."\n"; $error .= "===========================================================\n"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }}); - $anvil->nice_exit({exit_code => 1}); + return(1); } $anvil->data->{server}{$target}{$server}{$source}{parsed} = $server_xml; diff --git a/share/words.xml b/share/words.xml index f9731b44..6e12b0f8 100644 --- a/share/words.xml +++ b/share/words.xml @@ -512,6 +512,13 @@ The output, if any, was; [ Error ] - There was a problem purging records. The details of the problem should be in the logs. The table: [#!variable!table!#] has an entry in the history schema that doesn't have a corresponding record in the public schema. This is likely a resync artifact of a deleted record. Purging the record: [#!variable!uuid_column!#:#!variable!column_uuid!#] from all databases. [ Error ] - Failed to reconnect to the database, and now no connections remain. + ' for a server that was not running. + +The definition data passed in was: +==== +#!variable!definition!# +==== +]]> diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 76229bfc..2b572de6 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -1599,6 +1599,9 @@ sub check_anvil { my ($anvil) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::anvil_uuid" => $anvil->data->{new_server}{anvil_uuid}, + }}); if (($anvil->data->{switches}{anvil}) && (not $anvil->data->{switches}{'anvil-uuid'}) && (not $anvil->data->{switches}{'anvil-name'})) { if ($anvil->Validate->uuid({uuid => $anvil->data->{switches}{anvil}})) @@ -1618,8 +1621,15 @@ sub check_anvil } # Do we know or can we find the Anvil! UUID? - $anvil->data->{new_server}{anvil_uuid} = $anvil->data->{switches}{'anvil-uuid'} ? $anvil->data->{switches}{'anvil-uuid'} : ""; - $anvil->data->{new_server}{anvil_name} = $anvil->data->{switches}{'anvil-name'} ? $anvil->data->{switches}{'anvil-name'} : ""; + if (not $anvil->data->{new_server}{anvil_uuid}) + { + $anvil->data->{new_server}{anvil_uuid} = $anvil->data->{switches}{'anvil-uuid'} ? $anvil->data->{switches}{'anvil-uuid'} : ""; + $anvil->data->{new_server}{anvil_name} = $anvil->data->{switches}{'anvil-name'} ? $anvil->data->{switches}{'anvil-name'} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::anvil_uuid" => $anvil->data->{new_server}{anvil_uuid}, + "new_server::anvil_name" => $anvil->data->{new_server}{anvil_name}, + }}); + } if ((not $anvil->data->{new_server}{anvil_uuid}) && (not $anvil->data->{new_server}{anvil_name})) { @@ -2334,6 +2344,7 @@ sub interactive_ask_server_install_media $anvil->Database->get_files(); $anvil->Database->get_file_locations(); my $anvil_uuid = $anvil->data->{new_server}{anvil_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); # Create an array of install ISOs. my $install_isos = [""]; @@ -2427,6 +2438,7 @@ sub interactive_ask_server_driver_disc $anvil->Database->get_files(); $anvil->Database->get_file_locations(); my $anvil_uuid = $anvil->data->{new_server}{anvil_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); my $say_ram = $anvil->Convert->bytes_to_human_readable({"bytes" => $anvil->data->{new_server}{ram}}); my $storage_group_uuid = $anvil->data->{new_server}{storage_group}; @@ -2555,6 +2567,7 @@ sub interactive_ask_server_os my $say_install_media = $anvil->data->{files}{file_uuid}{$install_media_file_uuid}{file_name}; my $driver_disc_file_uuid = $anvil->data->{new_server}{driver_disc}; my $say_driver_disc = $driver_disc_file_uuid eq "none" ? "#!string!unit_0005!#" : $anvil->data->{files}{file_uuid}{$driver_disc_file_uuid}{file_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); print $terminal->Tputs('cl'); print $anvil->Words->string({key => "job_0150"})."\n"; @@ -2624,6 +2637,10 @@ sub interactive_ask_server_confirm $anvil->Database->get_files(); $anvil->Database->get_file_locations(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::anvil_uuid" => $anvil->data->{new_server}{anvil_uuid}, + "switches::ci-test" => $anvil->data->{switches}{'ci-test'}, + }}); if ($anvil->data->{switches}{'ci-test'}) { ### NOTE: Show available options; @@ -2946,19 +2963,23 @@ sub interactive_ask_server_confirm $anvil->data->{new_server}{storage_size} = $anvil->data->{switches}{'storage-size'}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:new_server::name' => $anvil->data->{new_server}{name}, - 's1:new_server::uuid' => $anvil->data->{new_server}{uuid}, - 's2:new_server::os' => $anvil->data->{new_server}{os}, - 's3:new_server::cpu' => $anvil->data->{new_server}{cpu}, - 's4:new_server::ram' => $anvil->data->{new_server}{ram}, - 's5:new_server::storage_group' => $anvil->data->{new_server}{storage_group}, - 's6:new_server::storage_size' => $anvil->data->{new_server}{storage_size}." (".$anvil->Convert->bytes_to_human_readable({"bytes" => $max_storage_group_size}).")", - 's7:new_server::install_media' => $anvil->data->{new_server}{install_media}, - 's8:new_server::driver_disc' => $anvil->data->{new_server}{driver_disc}, + 's2:new_server::uuid' => $anvil->data->{new_server}{uuid}, + 's3:new_server::os' => $anvil->data->{new_server}{os}, + 's4:new_server::cpu' => $anvil->data->{new_server}{cpu}, + 's5:new_server::ram' => $anvil->data->{new_server}{ram}, + 's6:new_server::storage_group' => $anvil->data->{new_server}{storage_group}, + 's7:new_server::storage_size' => $anvil->data->{new_server}{storage_size}." (".$anvil->Convert->bytes_to_human_readable({"bytes" => $max_storage_group_size}).")", + 's8:new_server::install_media' => $anvil->data->{new_server}{install_media}, + 's9:new_server::driver_disc' => $anvil->data->{new_server}{driver_disc}, }}); } my $anvil_uuid = $anvil->data->{new_server}{anvil_uuid}; my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:anvil_uuid' => $anvil_uuid, + 's2:node1_host_uuid' => $node1_host_uuid, + }}); my $storage_group_uuid = $anvil->data->{new_server}{storage_group}; my $say_ram = $anvil->Convert->bytes_to_human_readable({"bytes" => $anvil->data->{new_server}{ram}}); From 93e6a5984197253771b9e82cd0ce64e4eec01d67 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 22 Aug 2022 13:38:46 -0400 Subject: [PATCH 2/2] * Added 'vnc-server' to the list of firewall services enabled on strikers. * Created the anvil-manage-dr man page. * Reworked anvil-manage-dr's --protect logic to search for which network works with the DR host, instead of assuming it's the SN. Signed-off-by: Digimer --- Anvil/Tools/Network.pm | 2 +- man/anvil-manage-dr.8 | 85 +++++++ share/words.xml | 21 +- tools/anvil-manage-dr | 527 +++++++++++++++++++++++------------------ 4 files changed, 404 insertions(+), 231 deletions(-) create mode 100644 man/anvil-manage-dr.8 diff --git a/Anvil/Tools/Network.pm b/Anvil/Tools/Network.pm index 82fdd94d..f6a1a792 100644 --- a/Anvil/Tools/Network.pm +++ b/Anvil/Tools/Network.pm @@ -4676,7 +4676,7 @@ sub _manage_striker_firewall # We open dhcp, tftp, and dns on the BCN for the install target feature. DNS is not currently # provided, but it should be added later. my $changes = 0; - my @services = ("audit", "http", "https", "postgresql", "ssh", "vnc-server", "zabbix-agent", "zabbix-server"); + my @services = ("audit", "http", "https", "postgresql", "ssh", "vnc-server", "zabbix-agent", "zabbix-server", "vnc-server"); my @bcn_services = ("dhcp", "dns", "tftp"); my @ifn_services = (); diff --git a/man/anvil-manage-dr.8 b/man/anvil-manage-dr.8 new file mode 100644 index 00000000..589d47bc --- /dev/null +++ b/man/anvil-manage-dr.8 @@ -0,0 +1,85 @@ +.\" Manpage for the Anvil! server removal tool +.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. +.TH anvil-manage-dr "8" "August 18 2022" "Anvil! Intelligent Availability™ Platform" +.SH NAME +anvil-manage-dr \- This program manages if a server is (and when) a server is protected by a DR host. +.SH SYNOPSIS +.B anvil-manage-dr --server +\fI\, \/\fR[\fI\,options\/\fR] +.SH DESCRIPTION + +.TP +.SH OPTIONS +.TP +\-?, \-h, \fB\-\-help\fR +Show this man page. +.TP +\fB\-\-log-secure\fR +When logging, record sensitive data, like passwords. +.TP +\-v, \-vv, \-vvv +Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. +.SS "Commands:" +.TP +\fB\-\-connect\fR +Connect a server already on DR to it's DR copy, update the data there if needed and begin streaming replication. +.TP +\fB\-\-disconnect\fR +Disconnect a server from the DR image. This will end streaming replication. +.TP +\fB\-\-protect\fR +The sets up the server to be imaged on DR, if it isn't already protected. +.TP +Notes: If the server is not running, the DRBD resource volume(s) will be brought up. Both nodes need to be online and in the cluster. +.TP +\fB\-\-protocol\fR , default 'async' +This allows the protocol used to replicate data to the DR host to be configured. By default, 'async' is used. +.br +Modes: +.br +async (default) + +This tells the storage layer to consider the write to be completed once the data is on the active node's network transmit buffer. In this way, the DR host is allowed to fall behind a small amount, but the active nodes will not slow down because of higher network transit times to the DR location. +.br + NOTE: The transmit (TX) buffer size can be checked / updated with 'ethtool -g '. If the transmit buffer fills, storage will hold until the buffer flushes, causing periodic storage IO waits. You can increase the buffer size to a certain degree with'ethtool -G tx ' (set on all storage network link devices on both nodes. For more information, see: + + https://www.linuxjournal.com/content/queueing-linux-network-stack + + or + + https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/monitoring-and-tuning-the-rx-ring-buffer_configuring-and-managing-networking + + If you set the maximum transmit buffer size and still run into IO waits, consider 'long-throw'. +.br +sync + + This tells the storage layer to consider the write complete when the data has reached the DR host's storage (when the data is committed to disk on DR). This means that the DR host will never fall behind. However, if the DR's network latency is higher or the bandwidth to the DR is lower than that of the latency/bandwidth between the nodes, then total storage performance will be reduced to DR network speeds while DR is connected. + + This should be tested before implemented in production. +.br +long-throw + + This is an option that requires an additional license fee to use. + + This option (based on LINBIT's DRBD Proxy) and is designed for DR hosts that are connected over a wide-area network (or other cases where the connection to the DR is high-latency, low bandwidth or intermittently interrupted). It uses RAM on the host to act, effectively, as a very large transmit buffer. This requires allocating host RAM to the task, and so could reduces the available RAM assignable to assign to servers. + + In this mode, the DR host is allowed to fall further behind production, but it significantly reduces (hopefully eliminates) how often node replication waits because of a full transmit buffer. + + The default size is 16 MiB, with a maximum size of 16 GiB. When the size is set to over 1 GiB, the size allocated to this buffer is accounted for when calculating available RAM that can be assigned to hosted servers. +.TP +\fB\-\-remove\fB +This removes the DR image from the DR host for the server, freeing up space on DR but removing the protection afforded by DR. +.TP +\fB\-\-server\fB (required) +This is the name or UUID of the server being worked on. +.TP +\fB\-\-update\fB +This tells the DR to be connected and sync, Once the volume(s) on DR are 'UpToDate', the connection is closed. This provides a point in time update of the server's image on DR. +.TP +\fB\-\-Yes\fB +Note the capital 'Y'. This can be set to proceed without confirmation. Use carefully with '\-\-protect' and '\-\-remove'! If the '\-\-job-uuid' is set, this is assumed and no prompt will be presented. +.IP +.SH AUTHOR +Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. +.SH "REPORTING BUGS" +Report bugs to users@clusterlabs.org diff --git a/share/words.xml b/share/words.xml index 6e12b0f8..8235128b 100644 --- a/share/words.xml +++ b/share/words.xml @@ -774,8 +774,8 @@ sys::manage::firewall = 1 ]]> Waiting until the server: [#!variable!server!#] appears. [ Error ] - Timed out waiting for the server: [#!variable!server!#] to appear! Waiting for the server: [#!variable!server!#] to appear. Will wait: [#!variable!time_left!#] more seconds. + Failed to access: [#!variable!host_name!#], will check again in: [#!variable!waiting!#] seconds. + There was a problem writing the new resource config file: [#!variable!file!#] on the host: [#!variable!host_name!#]. +When checking, a difference was found: +==== +#!variable!difference!# +==== + +The new version should have been: +==== +#!variable!new_resource_config!# +==== + +The version read in (if anything) was: +==== +#!variable!check_resource_config!# +==== + Starting: [#!variable!program!#]. diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr index 3d6f3143..06aa28e0 100755 --- a/tools/anvil-manage-dr +++ b/tools/anvil-manage-dr @@ -18,6 +18,7 @@ use Anvil::Tools; require POSIX; use Term::Cap; use Text::Diff; +use Data::Dumper; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; @@ -31,29 +32,9 @@ $| = 1; my $anvil = Anvil::Tools->new(); -# -$anvil->data->{switches}{'connect'} = ""; # connect an existing DR resource -$anvil->data->{switches}{disconnect} = ""; # disconnect -$anvil->data->{switches}{'job-uuid'} = ""; # Used later -$anvil->data->{switches}{protect} = ""; # Set -$anvil->data->{switches}{protocol} = ""; # "sync", "async" or "long-throw" -$anvil->data->{switches}{remove} = ""; # Set -$anvil->data->{switches}{server} = ""; # Name or UUID -$anvil->data->{switches}{update} = ""; # connects, if needed, and disconnects once UpToDate -$anvil->data->{switches}{Yes} = ""; # Set to avoid confirmation, not case sensitive -$anvil->Get->switches; -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 'switches::connect' => $anvil->data->{switches}{'connect'}, - 'switches::disconnect' => $anvil->data->{switches}{disconnect}, - 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, - 'switches::protect' => $anvil->data->{switches}{protect}, - 'switches::protocol' => $anvil->data->{switches}{protocol}, - 'switches::remove' => $anvil->data->{switches}{remove}, - 'switches::server' => $anvil->data->{switches}{server}, - 'switches::update' => $anvil->data->{switches}{update}, - 'switches::Yes' => $anvil->data->{switches}{Yes}, -}}); +$anvil->Get->switches({list => ["connect", "disconnect", "job-uuid", "protect", "protocol", "remove", "server", "update", "Yes"], man => $THIS_FILE}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); @@ -117,9 +98,10 @@ sub sanity_check # are ready. if (($anvil->data->{switches}{protect}) or ($anvil->data->{switches}{remove})) { - if (($host_type ne "node") or (not $anvil_uuid)) + # Make sure we're in an Anvil! (Node or DR Host) + if (not $anvil_uuid) { - # This must be run on a node active in the cluster hosting the server being managed. + # This must be run on a node active in the cluster hosting the server being managed. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0332"}); $anvil->Job->update_progress({ progress => 100, @@ -128,33 +110,37 @@ sub sanity_check }); $anvil->nice_exit({exit_code => 1}); } - - # Can we parse the CIB? - my ($problem) = $anvil->Cluster->parse_cib(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) - { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0336", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); - $anvil->nice_exit({exit_code => 1}); - } - # Both nodes need to be in the cluster, are they? - if (not $anvil->data->{cib}{parsed}{'local'}{ready}) + # If we're a node, make sure we're in a cluster. + if ($host_type eq "node") { - # We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0337", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); + # Can we parse the CIB? + my ($problem) = $anvil->Cluster->parse_cib(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0336", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + $anvil->nice_exit({exit_code => 1}); + } + + # Both nodes need to be in the cluster, are they? + if (not $anvil->data->{cib}{parsed}{'local'}{ready}) + { + # We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0337", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } } if ($anvil->data->{switches}{protect}) @@ -201,6 +187,7 @@ sub sanity_check } # Can we access DR, if we're not the DR host? + $anvil->Database->get_ip_addresses({debug => 2}); my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; @@ -248,39 +235,39 @@ sub sanity_check }); $anvil->nice_exit({exit_code => 1}); } - } - - ### TODO: We can queue a job to update the peer later, there's no real need, in the long run, for the - ### peer to be online. - # If we're protecting or removing a server from DR, the peer needs to be up. - if ((($anvil->data->{switches}{protect}) or - ($anvil->data->{switches}{remove}) or - ($anvil->data->{switches}{protocol})) && - (not $anvil->data->{cib}{parsed}{peer}{ready})) - { - if ($anvil->data->{switches}{protect}) - { - # We can't setup a server to be protected unless both nodes are up, and the peer - # isn't at this time. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0338"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0338", - job_status => "failed", - }); - } - else + + ### TODO: We can queue a job to update the peer later, there's no real need, in the long run, for the + ### peer to be online. + # If we're protecting or removing a server from DR, the peer needs to be up. + if ((($anvil->data->{switches}{protect}) or + ($anvil->data->{switches}{remove}) or + ($anvil->data->{switches}{protocol})) && + (not $anvil->data->{cib}{parsed}{peer}{ready})) { - # We can't remove a server from DR unless both nodes are up, and the peer isn't at - # this time. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0339"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0339", - job_status => "failed", - }); + if ($anvil->data->{switches}{protect}) + { + # We can't setup a server to be protected unless both nodes are up, and the peer + # isn't at this time. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0338"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0338", + job_status => "failed", + }); + } + else + { + # We can't remove a server from DR unless both nodes are up, and the peer isn't at + # this time. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0339"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0339", + job_status => "failed", + }); + } + $anvil->nice_exit({exit_code => 1}); } - $anvil->nice_exit({exit_code => 1}); } # Verify we found the server. @@ -346,105 +333,8 @@ sub sanity_check ($anvil->data->{switches}{help}) or ($anvil->data->{switches}{h})) { - print " -What do you want to do? - -Options (all require --server ); - - --connect - - Connect a server already on DR to it's DR copy, update the data there if needed and begin streaming - replication. - - --disconnect - - Disconnect a server from the DR image. This will end streaming replication. - - --protect - - The sets up the server to be imaged on DR, if it isn't already protected. - - Notes: If the server is not running, the DRBD resource volume(s) will be brought up. Both nodes need - to be online and in the cluster. - - --protocol , default 'async' - - This allows the protocol used to replicate data to the DR host to be configured. By default, 'async' - is used. - - Modes: - - async (default) - - This tells the storage layer to consider the write to be completed once the data is on the - active node's network transmit buffer. In this way, the DR host is allowed to fall behind a - small amount, but the active nodes will not slow down because of higher network transit times - to the DR location. - - NOTE: The transmit (TX) buffer size can be checked / updated with 'ethtool -g '. - If the transmit buffer fills, storage will hold until the buffer flushes, causing - periodic storage IO waits. You can increase the buffer size to a certain degree with - 'ethtool -G tx ' (set on all storage network link devices on both - nodes. For more information, see: - - https://www.linuxjournal.com/content/queueing-linux-network-stack - - or - - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/monitoring-and-tuning-the-rx-ring-buffer_configuring-and-managing-networking - - If you set the maximum transmit buffer size and still run into IO waits, consider - 'long-throw'. - - sync - - This tells the storage layer to consider the write complete when the data has reached the DR - host's storage (when the data is committed to disk on DR). This means that the DR host will - never fall behind. However, if the DR's network latency is higher or the bandwidth to the DR - is lower than that of the latency/bandwidth between the nodes, then total storage performance - will be reduced to DR network speeds while DR is connected. - - This should be tested before implemented in production. - - long-throw - - This is an option that requires an additional license fee to use. - - This option (based on LINBIT's DRBD Proxy) and is designed for DR hosts that are connected - over a wide-area network (or other cases where the connection to the DR is high-latency, low - bandwidth or intermittently interrupted). It uses RAM on the host to act, effectively, as a - very large transmit buffer. This requires allocating host RAM to the task, and so could - reduces the available RAM assignable to assign to servers. - - In this mode, the DR host is allowed to fall further behind production, but it significantly - reduces (hopefully eliminates) how often node replication waits because of a full transmit - buffer. - - The default size is 16 MiB, with a maximum size of 16 GiB. When the size is set to over - 1 GiB, the size allocated to this buffer is accounted for when calculating available RAM that - can be assigned to hosted servers. - - --remove - - This removes the DR image from the DR host for the server, freeing up space on DR but removing the - protection afforded by DR. - - --server - - This is the name or UUID of the server being worked on. - - --update - - This tells the DR to be connected and sync, Once the volume(s) on DR are 'UpToDate', the connection - is closed. This provides a point in time update of the server's image on DR. - - --Yes - - Note the capital 'Y'. This can be set to proceed without confirmation. Use carefully with '--protect' - and '--remove'! If the '--job-uuid' is set, this is assumed and no prompt will be presented. - -Exiting. -"; + # Show the man page. + system($anvil->data->{path}{exe}{man}." ".$THIS_FILE); if (($anvil->data->{switches}{help}) or ($anvil->data->{switches}{h})) { $anvil->nice_exit({exit_code => 0}); @@ -461,7 +351,96 @@ Exiting. progress => 20, message => "job_0359", }); - + + # Loop until we have access to both the peer machines. + my $waiting = 1; + my $wait_for = 10; + while ($waiting) + { + # This will get set to 1 if we have to keep waiting. + $waiting = 0; + my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + password => $anvil->Log->is_secure($password), + node1_host_uuid => $node1_host_uuid, + node2_host_uuid => $node2_host_uuid, + dr1_host_uuid => $dr1_host_uuid, + }}); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $this_host_name = $anvil->Get->host_name_from_uuid({host_uuid => $this_host_uuid}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + this_host_uuid => $this_host_uuid, + this_host_name => $this_host_name, + }}); + # We'll try the SN, then the BCN and finally the IFN to see which, if any, network we + # can reach the peer on. This is needed because the DR host could be on a totally + # different network. + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip} = ""; + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_subnet} = ""; + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_network} = ""; + foreach my $check_network ("sn", "bcn", "mn", "ifn") + { + last if $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { check_network => $check_network }}); + foreach my $this_network (sort {$a cmp $b} keys %{$anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}}) + { + next if $this_network !~ /^$check_network/; + my $this_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{$this_network}{ip_address}; + my $this_subnet = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{$this_network}{subnet_mask}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_ip' => $this_ip, + 's2:this_network' => $this_network, + 's3:this_subnet' => $this_subnet, + }}); + + # Test access. + my $access = $anvil->Remote->test_access({ + target => $this_ip, + password => $password, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + if ($access) + { + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip} = $this_ip; + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_subnet} = $this_subnet; + $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_network} = $this_network; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "hosts::host_uuid::${this_host_uuid}::network::use_ip" => $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}, + "hosts::host_uuid::${this_host_uuid}::network::use_subnet" => $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_subnet}, + "hosts::host_uuid::${this_host_uuid}::network::use_network" => $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_network}, + }}); + } + } + } + + if (not $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}) + { + # No access + my $variables = { + waiting => $wait_for, + host_name => $this_host_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0404", variables => $variables}); + $anvil->Job->update_progress({ + progress => 12, + message => "job_0404", + variables => $variables, + }); + + $waiting = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + + # If we're waiting for a peer, record as such. + sleep $wait_for; + } + # If we're protecting, make sure there's enough space on the DR host. if ($anvil->data->{switches}{protect}) { @@ -608,8 +587,14 @@ sub process_update # "Peer" in this context is either/both nodes next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; - my $variables = { host_name => $peer_host_name }; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + + my $variables = { host_name => $peer_host_name }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0389", variables => $variables}); $anvil->Job->update_progress({ progress => 70, @@ -617,7 +602,7 @@ sub process_update variables => $variables, }); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, shell_call => $drbd_up_call, }); @@ -1003,7 +988,13 @@ sub process_connect # "Peer" in this context is either/both nodes next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + $variables = { host_name => $peer_host_name }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0389", variables => $variables}); $anvil->Job->update_progress({ @@ -1012,7 +1003,7 @@ sub process_connect variables => $variables, }); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, shell_call => $drbd_up_call, }); @@ -1160,6 +1151,7 @@ sub process_protect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_ram => $anvil->Convert->add_commas({number => $server_ram})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram}).")", }}); + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$short_host_name}{$server_name}{drbd}{resource}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); @@ -1512,6 +1504,11 @@ sub process_protect $job_data .= "protect=1\n"; $job_data .= "protocol=".$anvil->data->{switches}{protocol}."\n"; + # We always record the job against node 1, as it has to use cluster info to run this, so we + # can't run it on the DR itself. + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node1_host_uuid => $node1_host_uuid }}); + # Register the job with this host my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ debug => 2, @@ -1521,7 +1518,7 @@ sub process_protect job_title => "job_0384", job_description => "job_0385", job_progress => 0, - job_host_uuid => $anvil->Get->host_uuid, + job_host_uuid => $node1_host_uuid, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); @@ -1649,29 +1646,44 @@ sub process_protect } } - ### The connections. - $anvil->Database->get_ip_addresses({debug => 2}); - my $node1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{network}{sn1}{ip_address}; - my $node2_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{network}{sn1}{ip_address}; - my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; - my $dr_protocol = "A"; + # Refresh the IP info (usually scrubbed by this point) + $anvil->Database->get_ip_addresses(); + + # The connections. Node 1 to 2 always uses the BCN, Either node to DR needs + my $storage_network = "sn1"; + my $dr_network = $anvil->data->{lookup}{host_uuid}{$dr1_host_uuid}{network}{use_network}; + my $dr1_ip = $anvil->data->{lookup}{host_uuid}{$dr1_host_uuid}{network}{use_ip}; + my $node1_sn_ip = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{network}{$storage_network}{ip_address}; + my $node1_dr_ip = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{network}{$dr_network}{ip_address}; + my $node2_sn_ip = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{network}{$storage_network}{ip_address}; + my $node2_dr_ip = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{network}{$dr_network}{ip_address}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's01:storage_network' => $storage_network, + 's02:dr1_host_uuid' => $dr1_host_uuid, + 's03:dr_network' => $dr_network, + 's04:dr1_ip' => $dr1_ip, + 's05:node1_host_uuid' => $node1_host_uuid, + 's06:node1_sn_ip' => $node1_sn_ip, + 's07:node1_dr_ip' => $node1_dr_ip, + 's08:node2_host_uuid' => $node2_host_uuid, + 's09:node2_sn_ip' => $node2_sn_ip, + 's10:node2_dr_ip' => $node2_dr_ip, + }}); + + # Choose the DR protocol + my $dr_protocol = "A"; if ($anvil->data->{switches}{protocol} eq "sync") { $dr_protocol = "C"; } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - node1_sn1_ip => $node1_sn1_ip, - node2_sn1_ip => $node2_sn1_ip, - dr1_sn1_ip => $dr1_sn1_ip, - dr_protocol => $dr_protocol, - }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_protocol => $dr_protocol }}); - # Node 1 to Node 2 first, then n + # Node 1 to Node 2 first my $connections = $anvil->Words->string({key => "file_0005", variables => { host1_short_name => $node1_short_host_name, - host1_sn_ip => $node1_sn1_ip, + host1_ip => $node1_sn_ip, host2_short_name => $node2_short_host_name, - host2_sn_ip => $node2_sn1_ip, + host2_ip => $node2_sn_ip, tcp_port => $nodes_tcp_port, 'c-rate-maximum' => 500, protocol => "C", @@ -1681,9 +1693,9 @@ sub process_protect # Node 1 to DR $connections .= $anvil->Words->string({key => "file_0005", variables => { host1_short_name => $node1_short_host_name, - host1_sn_ip => $node1_sn1_ip, + host1_ip => $node1_dr_ip, host2_short_name => $dr1_short_host_name, - host2_sn_ip => $dr1_sn1_ip, + host2_ip => $dr1_ip, tcp_port => $node1_to_dr_port, 'c-rate-maximum' => 500, protocol => $dr_protocol, @@ -1693,9 +1705,9 @@ sub process_protect # Node 2 to DR $connections .= $anvil->Words->string({key => "file_0005", variables => { host1_short_name => $node2_short_host_name, - host1_sn_ip => $node2_sn1_ip, + host1_ip => $node2_dr_ip, host2_short_name => $dr1_short_host_name, - host2_sn_ip => $dr1_sn1_ip, + host2_ip => $dr1_ip, tcp_port => $node2_to_dr_port, 'c-rate-maximum' => 500, protocol => $dr_protocol, @@ -1713,6 +1725,8 @@ sub process_protect my $difference = diff \$old_resource_config, \$new_resource_config, { STYLE => 'Unified' }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { difference => $difference }}); + # Is there a difference on the local version? There might be a difference on peers, but we'll deel + # with that below. if (not $difference) { # The resource file doesn't need to be updated. @@ -1869,16 +1883,24 @@ sub process_protect progress => 72, message => "job_0368", }); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) { - # "Peer" in this context is either a node or a DR host + # "Peer" in this context is either a node or a DR host. It's not uncommon for the DR host to + # not have a connection over the SN or even the BCN. So we'll use the IFN1 to move files. next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + my $variables = { file => $config_file, host_name => $peer_host_name, - ip_address => $peer_sn_ip, + ip_address => $peer_ip, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0369", variables => $variables}); $anvil->Job->update_progress({ @@ -1895,10 +1917,41 @@ sub process_protect user => "root", group => "root", mode => "0644", - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + + # Make sure the file exists now. + my $check_resource_config = $anvil->Storage->read_file({ + file => $config_file, + target => $peer_ip, + password => $anvil_password, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { check_resource_config => $check_resource_config }}); + + my $difference = diff \$new_resource_config, \$check_resource_config, { STYLE => 'Unified' }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { difference => $difference }}); + + # Failed to write the file. + if ($difference) + { + $variables = { + host_name => $peer_host_name, + file => $config_file, + difference => $difference, + new_resource_config => $new_resource_config, + check_resource_config => $check_resource_config, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0405", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "job_0405", + variables => $variables, + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } } # Immediately call scan-drbd on all machines to ensure that if another run is about to happen for a @@ -1921,7 +1974,13 @@ sub process_protect # "Peer" in this context is either a node or a DR host next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + my $variables = { host_name => $peer_host_name }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0382", variables => $variables}); $anvil->Job->update_progress({ @@ -1930,7 +1989,7 @@ sub process_protect variables => $variables, }); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, shell_call => $scan_drbd_call, }); @@ -1950,12 +2009,12 @@ sub process_protect my $create_md = 0; foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$server_name}}) { - my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; - my $lv_path = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lv_path}; + my $dr1_ip = $anvil->data->{lookup}{host_uuid}{$dr1_host_uuid}{network}{use_ip}; + my $lv_path = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lv_path}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - volume => $volume, - dr1_sn1_ip => $dr1_sn1_ip, - lv_path => $lv_path, + 's1:volume' => $volume, + 's2:dr1_ip' => $dr1_ip, + 's3:lv_path' => $lv_path, }}); my $variables = { @@ -1978,7 +2037,7 @@ else fi"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lv_check_call => $lv_check_call }}); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $dr1_sn1_ip, + target => $dr1_ip, password => $anvil_password, shell_call => $lv_check_call, }); @@ -2003,7 +2062,7 @@ fi"; my $lvcreate_call = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lvcreate_call}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lvcreate_call => $lvcreate_call }}); ($output, $error, $return_code) = $anvil->Remote->call({ - target => $dr1_sn1_ip, + target => $dr1_ip, password => $anvil_password, shell_call => $lvcreate_call, }); @@ -2016,7 +2075,7 @@ fi"; sleep 1; # Does it exist now? ($output, $error, $return_code) = $anvil->Remote->call({ - target => $dr1_sn1_ip, + target => $dr1_ip, password => $anvil_password, shell_call => $lv_check_call, }); @@ -2044,14 +2103,14 @@ fi"; if ($create_md) { - my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; + my $dr1_ip = $anvil->data->{lookup}{host_uuid}{$dr1_host_uuid}{network}{use_ip}; my $drbd_md_call = $anvil->data->{path}{exe}{drbdadm}." --force create-md --max-peers=3 ".$server_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - dr1_sn1_ip => $dr1_sn1_ip, - drbd_md_call => $drbd_md_call, + 's1:dr1_ip' => $dr1_ip, + 's2:drbd_md_call' => $drbd_md_call, }}); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $dr1_sn1_ip, + target => $dr1_ip, password => $anvil_password, shell_call => $drbd_md_call, }); @@ -2080,7 +2139,13 @@ fi"; # "Peer" in this context is either a node or a DR host next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + my $variables = { server => $server_name, host_name => $peer_host_name, @@ -2092,7 +2157,7 @@ fi"; variables => $variables, }); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, shell_call => $shell_call, }); @@ -2123,7 +2188,13 @@ fi"; # "Peer" in this context is either a node or a DR host next if $this_host_uuid eq $anvil->Get->host_uuid(); my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; - my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $peer_ip = $anvil->data->{lookup}{host_uuid}{$this_host_uuid}{network}{use_ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host_uuid' => $this_host_uuid, + 's2:peer_host_name' => $peer_host_name, + 's3:peer_ip' => $peer_ip, + }}); + $variables = { host_name => $peer_host_name }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0376", variables => $variables}); $anvil->Job->update_progress({ @@ -2132,7 +2203,7 @@ fi"; variables => $variables, }); my ($output, $error, $return_code) = $anvil->Remote->call({ - target => $peer_sn_ip, + target => $peer_ip, password => $anvil_password, shell_call => $drbd_up_call, });