From f52d8196f600fe9262256045b91295dc20ef0791 Mon Sep 17 00:00:00 2001 From: Digimer Date: Wed, 21 Feb 2018 02:06:00 -0500 Subject: [PATCH] * Migration is now sort of working. There is still an issue to sort out with enabling drbd dual-primary, but server can move is some cases now. * Changed fence_pacemaker to exit with '1' on generic error as per LINBIT's comments. Signed-off-by: Digimer --- notes | 45 +++++++- ocf/alteeve/server | 239 ++++++++++++++++++++++++++++++++++++++---- tools/fence_pacemaker | 5 +- 3 files changed, 261 insertions(+), 28 deletions(-) diff --git a/notes b/notes index c6e63e39..97d9d64e 100644 --- a/notes +++ b/notes @@ -250,10 +250,17 @@ drbdsetup show all --show-defaults * Migate; # For all resources under the server; #drbdadm net-options r0 --allow-two-primaries=yes + drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes + # Migrate: -virsh migrate --unsafe --undefinesource --live srv01-c7 qemu+ssh://m3-a01n02.alteeve.com/system + +virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system list + +virsh migrate --unsafe --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system +virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system migrate --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system + # Again for all resource under the server; drbdadm net-options r0 --allow-two-primaries=no @@ -261,6 +268,12 @@ drbdsetup net-options --_name= --a virsh migrate --undefinesource --live qemu+ssh:///system drbdsetup net-options --_name= --allow-two-primaries=no +pcs constraint list --full +Location Constraints: + Resource: srv01-c7 + Enabled on: m3-a02n02.alteeve.com (score:50) (id:location-srv01-c7-m3-a02n02.alteeve.com-50) +pcs constraint remove location-srv01-c7-m3-a02n02.alteeve.com-50 + Set to 90% of BCN bandwidth @@ -412,9 +425,11 @@ mkdir /mnt/anvil/{provision,files,archive,definitions} pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op stop timeout="60" on-fail="block" meta allow-migrate="true" failure-timeout="75" pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op on-fail="block" meta allow-migrate="true" failure-timeout="75" - pcs resource create srv01-c7 ocf:alteeve:server name="srv01-c7" meta allow-migrate="true" op monitor interval="10" op stop on-fail="block" meta allow-migrate="true" failure-timeout="75" +pcs resource create srv01-c7 ocf:alteeve:server name="srv01-c7" meta allow-migrate="true" op monitor interval="60" op stop on-fail="block" op migrate_to on-fail="block" op migrate_from on-fail="block" meta allow-migrate="true" failure-timeout="75" + + == Resource Agent; https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc * A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with OCF_RESKEY_. For example, if the resource has an ip parameter set to 192.168.1.1, then the resource agent will have access to an environment variable OCF_RESKEY_ip holding that value. @@ -438,5 +453,29 @@ Migrate servers; - Set 'migration-limit' to '1' to enforce serial live migration (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-cluster-options). Migrate a single server by setting a location constraint against the node we want the VM off of. -- +- If anything goes wrong, the server will enter a blocked state in pacemaker. +- Recovery needs to be 'unmanage -> clean' to avoid a stop call. +11:57 <@kgaillot> for your design, that sounds right. between cleanup and manage, i'd make sure there was a PE run without any pending actions blocked by the unmanaging -- you can either look at the logs on the DC, run "crm_simulate -SL", or just check the status for a bit +11:58 <@kgaillot> you can play around with it by putting a higher preference on the to-be-cleaned node, to make sure it *does* move when you re-manage. that way you can see what logs/simulate/status look like + +12:07 <@kgaillot> i'm thinking if you do crm_resource --reprobe instead of cleanup in the above sequence, that should prevent anything unexpected +12:07 <@kgaillot> unmanage -> adjust preferences if needed -> reprobe resource -> wait for probe results to come back in, and if status looks good -> re-manage +12:08 <@kgaillot> the reprobe will wipe the entire resource history and fail counts for the resource, causing pacemaker to recheck the current status on all nodes. if the status then shows the resource running where you expect/want it, with no errors, then it's not going to do anything further +12:09 <@kgaillot> (in 2.0, cleanup only erases the history where the resource has failed, while reprobe erases the history regardless) +12:13 <@kgaillot> if there are no failures in the resource history, there should be no risk of a full stop. if there is no resource history at all, then after reprobe, there should be no risk of any actions (assuming you've set up location preferences and stickiness how you want them) + +Recover from a failed migration; + +reset location to prefer current host -> unmanage resource -> cleanup resource -> manage resource + +(running on node 2, so re-add location constraint - basically, make sure location constraint favours current host) + +pcs resource unmanage srv01-c7 +pcs constraint remove location-srv01-c7-m3-a02n02.alteeve.com-50 +crm_resource --reprobe +pcs resource manage srv01-c7 + + +==== +Unrelated Notes; diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 37d2375d..d0836967 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -167,7 +167,10 @@ if ($conf->{switches}{test}) to_log($conf, {message => "ocf:alteeve:server invoked.", 'line' => __LINE__, level => 2}); # This is for debugging. -if (($conf->{switches}{monitor}) or ($conf->{switches}{status})) +if (($conf->{switches}{monitor}) or + ($conf->{switches}{status}) or + ($conf->{switches}{'meta-data'}) or + ($conf->{switches}{metadaata})) { show_environment($conf, 3); } @@ -304,6 +307,37 @@ sub start_server my $server = $conf->{environment}{OCF_RESKEY_name}; to_log($conf, {message => "We've been asked to start the server: [$server].", 'line' => __LINE__, level => 2}); + + # If the server is already here, we'll do nothing else. + my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list"); + if ($return_code) + { + # If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to + # also start the server on another node, because we don't know the state of it here. + to_log($conf, {message => "It appears that the list the currently running servers returned a non-zero return code: [$return_code]. We will proceed as we may be able to fix this. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"}); + } + foreach my $line (split/\n/, $output) + { + $line =~ s/^\s+//; + $line =~ s/\s+$//; + $line =~ s/\s+/ /g; + + if ($line =~ /^(\d+) $server (.*)$/) + { + my $state = $2; + to_log($conf, {message => "server: [$server], state: [$state]", 'line' => __LINE__, level => 2}); + + if ($state ne "shut down") + { + # Abort + to_log($conf, {message => "The server: [$server] is already on this node in the state: [$state], aborting the start request.", 'line' => __LINE__, level => 2}); + exit(0); + } + last; + } + } + + # We need to boot, validate everything. validate_all($conf); # If we're still alive, we're ready to boot. @@ -313,7 +347,9 @@ sub start_server $definition_file =~ s/#!NAME!#/$server/; to_log($conf, {message => "definition_file: [$definition_file].", 'line' => __LINE__, level => 2}); - my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." create $definition_file"); + $return_code = undef; + $output = undef; + ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." create $definition_file"); if ($return_code) { # If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to @@ -662,23 +698,133 @@ sub migrate_server my $server = $conf->{environment}{OCF_RESKEY_name}; my $source = $conf->{environment}{OCF_RESKEY_CRM_meta_migrate_source}; my $target = $conf->{environment}{OCF_RESKEY_CRM_meta_migrate_target}; + + # The actual migration command will involve enabling dual primary, then beginning the migration. The + # virsh call will depend on if we're pushing or pulling. Once the migration completes, regardless of + # success or failure, dual primary will be disabled again. + my $migration_command = ""; + to_log($conf, {message => "server: [$server], source: [$source], target: [$target].", 'line' => __LINE__, level => 1}); if ($conf->{switches}{migrate_to}) { to_log($conf, {message => "We're pushing the: [$server] to: [$target].", 'line' => __LINE__, level => 1}); - validate_all($conf); + + # Is the server even here? + my $found = 0; + my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list"); + if ($return_code) + { + to_log($conf, {message => "It appears that the call to check if the server: [$server] is on this node returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"}); + exit(1); + } + foreach my $line (split/\n/, $output) + { + $line =~ s/^\s+//; + $line =~ s/\s+$//; + $line =~ s/\s+/ /g; + + if ($line =~ /^(\d+) $server (.*)$/) + { + my $state = $2; + $found = 1; + to_log($conf, {message => "server: [$server], state: [$state], found: [$found]", 'line' => __LINE__, level => 2}); + + # We can only migrate if it is running. + if (lc($state) ne "running") + { + to_log($conf, {message => "The server: [$server] state is: [$state]. A server must be 'running' in order to migrate it.", 'line' => __LINE__, level => 0, priority => "err"}); + exit(1); + } + } + } + if (not $found) + { + to_log($conf, {message => "The server: [$server] wasn't found on this machine.", 'line' => __LINE__, level => 0, priority => "err"}); + exit(1); + } + + read_server_definition($conf); + validate_storage($conf); + + # If we're alive, craft the migration command. + $migration_command = $conf->{path}{exe}{virsh}." migrate --undefinesource --live ".$server." qemu+ssh://".$target."/system"; + to_log($conf, {message => "migration_command: [$migration_command].", 'line' => __LINE__, level => 1}); } elsif ($conf->{switches}{migrate_from}) { + # Validate everything, as if we were about to boot to_log($conf, {message => "We're pulling the: [$server] from: [$target].", 'line' => __LINE__, level => 1}); + validate_all($conf); + + # If we're alive, craft the migration command. + $migration_command = $conf->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system migrate --undefinesource --live ".$server." qemu+ssh://".$target."/system"; + to_log($conf, {message => "migration_command: [$migration_command].", 'line' => __LINE__, level => 1}); } - else + + # Enable dual-primary. If this fails, we will disable (or try to) and then abort. + my $migrate = 1; + foreach my $resource (sort {$a cmp $b} keys %{$conf->{resource}}) { - # WTF? + next if not defined $conf->{resource}{$resource}{target_node_id}; + next if not $migrate; + my $shell_call = $conf->{path}{exe}{drbdsetup}." net-options ".$resource." ".$conf->{resource}{$resource}{target_node_id}." --allow-two-primaries=yes"; + to_log($conf, {message => "shell_call: [$shell_call].", 'line' => __LINE__, level => 1}); + + to_log($conf, {message => "Temporarily enabling dual primary for the resource: [$resource] to the node: [".$conf->{resource}{$resource}{target_name}." (".$conf->{resource}{$resource}{target_node_id}."].", 'line' => __LINE__, level => 1}); + my ($return_code, $output) = shell_call($conf, $shell_call); + if ($return_code) + { + # Something went wrong. + to_log($conf, {message => "The attempt to enable dual-primary for the resource: [$resource] to the node: [".$conf->{resource}{$resource}{target_name}." (".$conf->{resource}{$resource}{target_node_id}.")] returned a non-zero return code [$return_code]. The returned output (if any) was: [$output].", 'line' => __LINE__, level => 0, priority => "err"}); + + # Disable migration (and any further attempts to enable dual-primary). + $migrate = 0; + to_log($conf, {message => "migrate: [$migrate].", 'line' => __LINE__, level => 1}); + } } - # Return failed until this is actually implemented. - exit(1); + my $migrated = 0; + if ($migrate) + { + # Call the migration. + to_log($conf, {message => "The migration of: [$server] to the node: [$target] will now begin.", 'line' => __LINE__, level => 1}); + to_log($conf, {message => "migration_command: [$migration_command].", 'line' => __LINE__, level => 1}); + my ($return_code, $output) = shell_call($conf, $migration_command); + if ($return_code) + { + # Something went wrong. + to_log($conf, {message => "The attempt to migrate the server: [$server] to the node: [$target] returned a non-zero return code [$return_code]. The returned output (if any) was: [$output].", 'line' => __LINE__, level => 0, priority => "err"}); + + $migrated = 0; + to_log($conf, {message => "migrated: [$migrated].", 'line' => __LINE__, level => 1}); + } + } + + # Switch off dual-primary. + foreach my $resource (sort {$a cmp $b} keys %{$conf->{resource}}) + { + next if not defined $conf->{resource}{$resource}{target_node_id}; + my $shell_call = $conf->{path}{exe}{drbdsetup}." net-options ".$resource." ".$conf->{resource}{$resource}{target_node_id}." --allow-two-primaries=no"; + to_log($conf, {message => "shell_call: [$shell_call].", 'line' => __LINE__, level => 1}); + + to_log($conf, {message => "Re-disabling dual primary for the resource: [$resource] to the node: [".$conf->{resource}{$resource}{target_name}." (".$conf->{resource}{$resource}{target_node_id}."].", 'line' => __LINE__, level => 1}); + my ($return_code, $output) = shell_call($conf, $shell_call); + if ($return_code) + { + # Something went wrong. + to_log($conf, {message => "The attempt to re-disable dual-primary for the resource: [$resource] to the node: [".$conf->{resource}{$resource}{target_name}." (".$conf->{resource}{$resource}{target_node_id}.")] returned a non-zero return code [$return_code]. The returned output (if any) was: [$output].", 'line' => __LINE__, level => 0, priority => "err"}); + } + } + + # Did something go wrong during the dual-primary enable or the actual migration call? + if ((not $migrate) or (not $migrated)) + { + # Exit + exit(1); + } + + # If we made it here, we succeeded. + exit(0); } # Validation checks that we have the definition XML, resource config and that needed apps are installed. @@ -700,7 +846,7 @@ sub validate_all to_log($conf, {message => "- Eumlator is valid.", 'line' => __LINE__, level => 2}); # These tests are only needed if we're about to boot the server - if ($conf->{switches}{start}) + if (($conf->{switches}{start}) or ($conf->{switches}{migrate_from})) { # Check that we have enough RAM. validate_ram($conf); @@ -808,8 +954,11 @@ sub validate_storage } } - # Verify optical disks now - validate_storage_optical($conf); + # Verify optical disks now, unless we're migrating a server off of us. + if (not $conf->{switches}{migrate_to}) + { + validate_storage_optical($conf); + } # Verify DRBD devices now validate_storage_drbd($conf); @@ -1054,6 +1203,15 @@ sub validate_storage_drbd } } + # If I am about to push a server off, we need to make sure the peer is UpToDate + if ($conf->{switches}{migrate_to}) + { + to_log($conf, {message => "Checking that the peer's DRBD resources are Connected and UpToDate prior to migration.", 'line' => __LINE__, level => 2}); + foreach my $device_path (sort {$a cmp $b} keys %{$conf->{server}{disks}}) + { + } + } + return(0); } @@ -1076,29 +1234,31 @@ sub check_drbd_status ### This disk is in use by this server, check it. to_log($conf, {message => "The local replicated disk: [$device_path] is used by this server. Checking it out now.", 'line' => __LINE__, level => 2}); - # First, are any of the local volumes not UpToDate? - foreach my $device_ref (@{$resource_ref->{devices}}) + # If we're booting a server or migrating it here, we need to make sure all local + # volumes are UpToDate? + if (($conf->{switches}{start}) or ($conf->{switches}{migrate_from})) { - # Are we UpToDate (or SyncSource)? - if ((lc($device_ref->{'disk-state'}) ne "uptodate") && (lc($device_ref->{'disk-state'}) ne "syncsource")) + foreach my $device_ref (@{$resource_ref->{devices}}) { - # If we've been asked to start, refuse. - if ($conf->{switches}{start}) + # Are we UpToDate (or SyncSource)? + if ((lc($device_ref->{'disk-state'}) ne "uptodate") && (lc($device_ref->{'disk-state'}) ne "syncsource")) { + # We can't start here. to_log($conf, {message => "The DRBD resource: [$resource] volume: [".$device_ref->{volume}."] locat disk state is: [".$device_ref->{'disk-state'}."]. Unsafe to boot the server unless the disk state is UpToDate.", 'line' => __LINE__, level => 0, priority => "err"}); exit(1); } - } - else - { - to_log($conf, {message => "The DRBD resource: [$resource] volume: [".$device_ref->{volume}."] locat disk state is: [".$device_ref->{'disk-state'}."], good.", 'line' => __LINE__, level => 2}); + else + { + to_log($conf, {message => "The DRBD resource: [$resource] volume: [".$device_ref->{volume}."] locat disk state is: [".$device_ref->{'disk-state'}."], good.", 'line' => __LINE__, level => 2}); + } } } - # Is this a connection we care about? + # If we're booting a server, we need to be sure that *no* peer is Primary. If we're + # migrating, we need to be sure the migration target is UpToDate. foreach my $connection_ref (@{$resource_ref->{connections}}) { - # Is the peer's role Primary? + # Is the peer's role Primary? In all cases, we abort if so. to_log($conf, {message => "Checking connection to: [".$connection_ref->{name}."].", 'line' => __LINE__, level => 2}); if (lc($connection_ref->{'peer-role'}) eq "primary") { @@ -1109,6 +1269,41 @@ sub check_drbd_status exit(1); } } + + # If we're migrating to the peer, make sure the target disk state is UpToDate + # or SyncSource. + if (($conf->{switches}{migrate_to}) or ($conf->{switches}{migrate_to})) + { + # Is this connection to our migration target? + my $peer_short_name = $connection_ref->{name}; + $peer_short_name =~ s/\..*$//; + my $migration_target = $conf->{environment}{OCF_RESKEY_CRM_meta_migrate_target}; + $migration_target =~ s/\..*$//; + to_log($conf, {message => "peer_short_name: [$peer_short_name], migration_target: [$migration_target].", 'line' => __LINE__, level => 2}); + if ($peer_short_name ne $migration_target) + { + # Ignore this, it isn't our target + to_log($conf, {message => "Ignoring the connection to: [$peer_short_name], it isn't the migration target.", 'line' => __LINE__, level => 2}); + next; + } + + # We will need the node ID to enable dual-primary. + #print Dumper $connection_ref; + $conf->{resource}{$resource}{target_name} = $connection_ref->{name}; + $conf->{resource}{$resource}{target_node_id} = $connection_ref->{'peer-node-id'}; + to_log($conf, {message => "resource::${resource}::target_name: [".$conf->{resource}{$resource}{target_name}."], resource::${resource}::target_node_id: [".$conf->{resource}{$resource}{target_node_id}."].", 'line' => __LINE__, level => 2}); + + # If we're still alive, we want to ensure all volumes are UpToDate. + foreach my $volume_ref (@{$connection_ref->{peer_devices}}) + { + to_log($conf, {message => "volume: [".$volume_ref->{volume}."], disk_state: [".$volume_ref->{'peer-disk-state'}."].", 'line' => __LINE__, level => 2}); + if ((lc($volume_ref->{'peer-disk-state'}) ne "uptodate") && (lc($volume_ref->{'peer-disk-state'}) ne "syncsource")) + { + to_log($conf, {message => "The DRBD resource: [$resource] on the peer: [".$connection_ref->{name}."] is not UpToDate (or SyncSource). Refusing to migrate.", 'line' => __LINE__, level => 0, priority => "err"}); + exit(1); + } + } + } } # If we're here, it's OK. diff --git a/tools/fence_pacemaker b/tools/fence_pacemaker index c33138c1..af70c447 100755 --- a/tools/fence_pacemaker +++ b/tools/fence_pacemaker @@ -33,9 +33,8 @@ # ========= # # This program uses; -# - 1 = Something failed +# - 1 = Something failed or end of program hit (should not happen). # - 7 = Fence succeeded -# - 255 = End of program hit... should never happen. # # TODO: # - Read the CIB; 'pcs status xml' or '/usr/sbin/cibadmin --local --query' ? @@ -156,7 +155,7 @@ to_log($conf, {message => "Ready to fence: [".$conf->{cluster}{target_node}."]", kill_target($conf); # If we hit here, something very wrong happened. -exit(255); +exit(1); #############################################################################################################