From 14bf323627f09407cb58c780301bb6df110c9333 Mon Sep 17 00:00:00 2001 From: Digimer Date: Thu, 20 Aug 2020 06:32:19 -0400 Subject: [PATCH] * Fixed an issue with ocf:alteeve:server where, after a migration, the target host would invoke the RA as if it was trying to migrate, instead of verifying the server (resource) was OK post migration. * Fixed a bug in Server->get_status() where the call to Storage->rsync's returned output checked for '!!errer!!' instead of '!!error!!'. * Fixed a bug in Storage->rsync where, when no port was passed in, it would try to specify an empty port and fail. Signed-off-by: Digimer --- Anvil/Tools/Remote.pm | 2 +- Anvil/Tools/Server.pm | 6 ++++- Anvil/Tools/Storage.pm | 7 ++++++ Anvil/Tools/System.pm | 2 +- notes | 10 ++++---- ocf/alteeve/server | 53 ++++++++++++++++++++++++++---------------- share/words.xml | 2 ++ 7 files changed, 54 insertions(+), 28 deletions(-) diff --git a/Anvil/Tools/Remote.pm b/Anvil/Tools/Remote.pm index 70433b0a..30e3bbbc 100644 --- a/Anvil/Tools/Remote.pm +++ b/Anvil/Tools/Remote.pm @@ -748,7 +748,7 @@ sub test_access my $anvil = $self->parent; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; - my $password = defined $parameter->{password} ? $parameter->{password} : $anvil->data->{sys}{root_password}; + my $password = defined $parameter->{password} ? $parameter->{password} : ""; my $port = defined $parameter->{port} ? $parameter->{port} : 22; my $target = defined $parameter->{target} ? $parameter->{target} : ""; my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<); diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index d8cf297b..82b9e1e5 100755 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -421,7 +421,7 @@ sub get_status $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "server::${host}::${server}::from_disk::xml" => $anvil->data->{server}{$host}{$server}{from_disk}{xml}, }}); - if (($anvil->data->{server}{$host}{$server}{from_disk}{xml} eq "!!errer!!") or (not $anvil->data->{server}{$host}{$server}{from_disk}{xml})) + if (($anvil->data->{server}{$host}{$server}{from_disk}{xml} eq "!!error!!") or (not $anvil->data->{server}{$host}{$server}{from_disk}{xml})) { # Failed to read it. $anvil->data->{server}{$host}{$server}{from_disk}{xml} = ""; @@ -707,6 +707,10 @@ sub migrate # Call the migration now my ($output, $return_code) = $anvil->System->call({shell_call => $migration_command}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); if ($return_code) { # Something went wrong. diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index c374001e..0653adb2 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -2307,6 +2307,13 @@ sub rsync try_again => $try_again, }}); + # Make sure the port is sane; + if ((not $port) or ($port =~ /\D/) or ($port < 0) or ($port > 65535)) + { + $port = 22; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { port => $port }}); + } + # Add an argument for the port if set if ($port ne "22") { diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index 877b1159..2937dbf9 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -1187,7 +1187,7 @@ sub check_storage This uses the host information along with the Anvil! the host is in to find and configure the local IPMI BMC. -If this host is not in an Anvil!, or if the host is in an Anvil!, but no IPMI BMC was found, or any other issue arises, C<< 0 >> is returned. If there is any problem, C<< !!errer!! >> will be returned. +If this host is not in an Anvil!, or if the host is in an Anvil!, but no IPMI BMC was found, or any other issue arises, C<< 0 >> is returned. If there is any problem, C<< !!error!! >> will be returned. If a BMC is found and configured, the C<< fence_ipmilan >> call used to check the status is stored in C<< hosts >> -> C<< host_ipmi >>, and the same string is returned. diff --git a/notes b/notes index b2492d2c..d25ed642 100644 --- a/notes +++ b/notes @@ -293,9 +293,9 @@ pcs resource clone hypervisor clone-max=2 notify="false" pcs resource create drbd systemd:drbd op monitor interval=60 pcs resource clone drbd clone-max=2 notify="false" -pcs resource create srv01-sql ocf:alteeve:server name="srv01-sql" meta allow-migrate="true" op monitor interval="60" on-fail="block" +pcs resource create srv07-el6 ocf:alteeve:server name="srv07-el6" meta allow-migrate="true" op monitor interval="60" on-fail="block" - or - -pcs resource update srv01-sql ocf:alteeve:server name="srv01-sql" meta allow-migrate="true" op monitor interval="60" on-fail="block" +pcs resource update srv07-el6 ocf:alteeve:server name="srv07-el6" meta allow-migrate="true" op monitor interval="60" on-fail="block" # Test stonith_admin --fence el8-a01n02 --verbose; crm_error $? @@ -912,13 +912,13 @@ virt-install --connect qemu:///system \ # Migration; -pcs resource move srv01-sql +pcs resource move srv07-el6 mk-a02n02 # (then remove the constraint); pcs constraint show --full -pcs constraint remove cli-ban-srv01-sql-on-el8-a01n01 +pcs constraint remove cli-ban-srv07-el6-on-mk-a02n01 -pcs constraint remove $(pcs constraint show --full | grep ban-srv01-sql | perl -pe 's/^.*?id:(.*?)\)/$1/') +pcs constraint remove $(pcs constraint show --full | grep ban-srv07-el6 | perl -pe 's/^.*?id:(.*?)\)/$1/') DRBD 9 - Check; diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 35df820d..0151fed5 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -211,11 +211,17 @@ if (not $anvil->data->{switches}{monitor}) # help  - (usage maps here) Displays a usage message when the resource agent is invoked from the command line, rather than by the cluster manager. # notify  - Inform resource about changes in state of other clones. -if (($anvil->data->{switches}{migrate_to}) or ($anvil->data->{switches}{migrate_from})) +if ($anvil->data->{switches}{migrate_to}) { # We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3) migrate_server($anvil); } +elsif ($anvil->data->{switches}{migrate_from}) +{ + # This is called after a migration is complete, so we're basically just doing a status check. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0529", variables => { server => $anvil->data->{environment}{OCF_RESKEY_name} }}); + server_status($anvil); +} elsif ($anvil->data->{switches}{start}) { # Start the server @@ -659,7 +665,7 @@ sub start_server start_drbd_resource($anvil); # Still alive? Boot! - my ($success) = $anvil->Server->boot({debug => 2, server => $server}); + my ($success) = $anvil->Server->boot({debug => 3, server => $server}); if ($success) { # Success! @@ -709,7 +715,7 @@ sub stop_drbd_resource # Bring the peer's resource down. $anvil->DRBD->manage_resource({ - debug => 2, + debug => 3, resource => $resource, task => "down", target => $peer_ip, @@ -717,7 +723,7 @@ sub stop_drbd_resource # Bring the local resource down $anvil->DRBD->manage_resource({ - debug => 2, + debug => 3, resource => $resource, task => "down", }); @@ -946,7 +952,7 @@ sub stop_server # Read in an parse the server's XML. $anvil->System->check_storage({debug => 3}); - $anvil->Server->get_status({debug => 2, server => $server}); + $anvil->Server->get_status({debug => 3, server => $server}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0313", variables => { server => $server }}); my $success = $anvil->Server->shutdown({debug => 3, server => $server}); @@ -1159,17 +1165,6 @@ sub migrate_server { my ($anvil) = @_; - # Before migrating, make sure the daemons are running on the peer. - check_daemons($anvil, "start"); - - # Make sure switches are at least defined. - $anvil->data->{switches}{migrate_to} = "" if not defined $anvil->data->{switches}{migrate_to}; - $anvil->data->{switches}{migrate_from} = "" if not defined $anvil->data->{switches}{migrate_from}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 'switches::migrate_to' => $anvil->data->{switches}{migrate_to}, - 'switches::migrate_from' => $anvil->data->{switches}{migrate_from}, - }}); - ### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a ### user might want to do this (ie: sync will be done soon and the need to evacuate the node ### ASAP is high). Maybe we'll enforce this and require a '--force' switch later? @@ -1190,6 +1185,23 @@ sub migrate_server meta_on_node => $meta_on_node, }}); + # Make sure switches are at least defined. + $anvil->data->{switches}{migrate_to} = "" if not defined $anvil->data->{switches}{migrate_to}; + $anvil->data->{switches}{migrate_from} = "" if not defined $anvil->data->{switches}{migrate_from}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::migrate_to' => $anvil->data->{switches}{migrate_to}, + 'switches::migrate_from' => $anvil->data->{switches}{migrate_from}, + }}); + + # Log what we're doing. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0528", variables => { + server => $server, + target_host => $target, + }}); + + # Before migrating, make sure the daemons are running on the peer. + check_daemons($anvil, "start"); + # The actual migration command will involve enabling dual primary, then beginning the migration. The # virsh call will depend on if we're pushing or pulling. Once the migration completes, regardless of # success or failure, dual primary will be disabled again. @@ -1347,7 +1359,7 @@ sub migrate_server # If we're still alive, we're ready to migrate. ($migrated) = $anvil->Server->migrate({ - debug => 2, + debug => 3, server => $server, source => $source, target => $target @@ -1356,6 +1368,7 @@ sub migrate_server } elsif ($source) { + ### NOTE: Pacemaker doesn't seem to ever pull servers. # Pull the server here. Start by verifying it's on the 'meta_on_node' host. # Scan locally and on our peer $anvil->Server->find({debug => 3}); @@ -1401,7 +1414,7 @@ sub migrate_server # Call the pull migation. ($migrated) = $anvil->Server->migrate({ - debug => 2, + debug => 3, server => $server, source => $source, target => $target @@ -1440,8 +1453,8 @@ sub validate_all }}); # Read in an parse the server's XML. - $anvil->System->check_storage({debug => 2}); - $anvil->Server->get_status({debug => 2, server => $server}); + $anvil->System->check_storage({debug => 3}); + $anvil->Server->get_status({debug => 3, server => $server}); # Is the name in the definition file what we expect (and did we read the XML data at all)? validate_name($anvil); diff --git a/share/words.xml b/share/words.xml index 862f606b..5b58e9c4 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1005,6 +1005,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The server is shutting down. Will wait for it to finish... The server is off. The server is running (state is: [#!variable!state!#]). + We've been asked to migrating the server: [#!variable!server!#] to: [#!variable!target_host!#]. + Checking server state after: [#!variable!server!#] was migrated to this host. The host name: [#!variable!target!#] does not resolve to an IP address.