diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index 8579a150..a8127fdc 100755 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -12,9 +12,11 @@ our $VERSION = "3.0.0"; my $THIS_FILE = "DRBD.pm"; ### Methods; +# allow_two_primaries # get_devices # get_status # manage_resource +# reload_defaults =pod @@ -75,6 +77,147 @@ sub parent # Public methods # ############################################################################################################# +=head2 allow_two_primaries + +This enables dual-primary for the given resource. This is meant to be called prior to a live migration, and should be disabled again as soon as possible via C<< DRBD->reload_defaults >>. + +Parameters; + +=head3 password (optional) + +This is the password to use when connecting to a remote machine. If not set, but C<< target >> is, an attempt to connect without a password will be made. + +=head3 port (optional) + +This is the TCP port to use when connecting to a remote machine. If not set, but C<< target >> is, C<< 22 >> will be used. + +=head3 remote_user (optional, default 'root') + +If C<< target >> is set, this will be the user we connect to the remote machine as. + +=head3 resource (required) + +This is the name of the resource to enable two primaries on. + +=head3 target (optional) + +This is the IP or host name of the machine to read the version of. If this is not set, the local system's version is checked. + +=head3 target_node_id (optional, but see condition below) + +This is the DRBD target node's (connection) ID that we're enabling dual-primary with. If this is not passed, but C<< drbd::status::::resource::::connection::::peer-node-id >> is set, it will be used. Otherwise this argument is required. + +=cut +sub allow_two_primaries +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + + my $password = defined $parameter->{password} ? $parameter->{password} : ""; + my $port = defined $parameter->{port} ? $parameter->{port} : ""; + my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root"; + my $resource = defined $parameter->{resource} ? $parameter->{resource} : ""; + my $target = defined $parameter->{target} ? $parameter->{target} : "local"; + my $target_node_id = defined $parameter->{target_node_id} ? $parameter->{target_node_id} : ""; + my $return_code = 255; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + password => $anvil->Log->secure ? $password : $anvil->Words->string({key => "log_0186"}), + port => $port, + remote_user => $remote_user, + resource => $resource, + target => $target, + target_node_id => $target_node_id, + }}); + + if (not $resource) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->allow_two_primaries()", parameter => "resource" }}); + return($return_code); + } + + # Do we need to scan devices? + my $host = $anvil->_short_hostname; + if (not $anvil->data->{drbd}{config}{$host}{peer}) + { + # Get our device list. + $anvil->DRBD->get_devices({ + debug => $debug, + password => $password, + port => $port, + remote_user => $remote_user, + target => $target, + }); + } + + my $peer_name = $anvil->data->{drbd}{config}{$host}{peer}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { peer_name => $peer_name }}); + if ($target_node_id !~ /^\d+$/) + { + # Can we find it? + if ($anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-node-id'} =~ /^\d+$/) + { + $target_node_id = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-node-id'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target_node_id => $target_node_id }}); + } + else + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->allow_two_primaries()", parameter => "target_node_id" }}); + return($return_code); + } + } + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0350", variables => { + resource => $resource, + target_name => $peer_name, + target_node_id => $target_node_id, + }}); + + my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." net-options ".$resource." ".$target_node_id." --allow-two-primaries=yes"; + my $output = ""; + if (($target) && ($target ne "local") && ($target ne $anvil->_hostname) && ($target ne $anvil->_short_hostname)) + { + # Remote call. + ($output, my $error, $return_code) = $anvil->Remote->call({ + debug => $debug, + shell_call => $shell_call, + target => $target, + port => $port, + password => $password, + remote_user => $remote_user, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + else + { + # Local. + ($output, $return_code) = $anvil->System->call({ + debug => $debug, + shell_call => $shell_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + } + + if ($return_code) + { + # Something went wrong. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0356", variables => { + return_code => $return_code, + output => $output, + }}); + } + + return($return_code); +} + =head2 get_devices This finds all of the configured '/dev/drbdX' devices and maps them to their resource names. @@ -694,6 +837,104 @@ sub manage_resource return($return_code); } +=head2 reload_defaults + +This switches DRBD back to running using the values in the config files. Specifically, it calls C<< drbdadm adjust all >>. + +The return code from the C<< drbdadm >> call is returned by this method. + +Parameters; + +=head3 password (optional) + +This is the password to use when connecting to a remote machine. If not set, but C<< target >> is, an attempt to connect without a password will be made. + +=head3 port (optional) + +This is the TCP port to use when connecting to a remote machine. If not set, but C<< target >> is, C<< 22 >> will be used. + +=head3 remote_user (optional, default 'root') + +If C<< target >> is set, this will be the user we connect to the remote machine as. + +=head3 resource (required) + +This is the name of the resource to reload the default configuration for (ie: disable dual primary, pickup changes from the config file, etc).. + +=head3 target (optional) + +This is the IP or host name of the machine to read the version of. If this is not set, the local system's version is checked. + +=cut +sub reload_defaults +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + + my $password = defined $parameter->{password} ? $parameter->{password} : ""; + my $port = defined $parameter->{port} ? $parameter->{port} : ""; + my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root"; + my $resource = defined $parameter->{resource} ? $parameter->{resource} : ""; + my $target = defined $parameter->{target} ? $parameter->{target} : "local"; + my $return_code = 255; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + password => $anvil->Log->secure ? $password : $anvil->Words->string({key => "log_0186"}), + port => $port, + remote_user => $remote_user, + resource => $resource, + target => $target, + }}); + + if (not $resource) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->allow_two_primaries()", parameter => "resource" }}); + return($return_code); + } + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0355"}); + my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource; + my $output = ""; + if (($target) && ($target ne "local") && ($target ne $anvil->_hostname) && ($target ne $anvil->_short_hostname)) + { + # Remote call. + ($output, my $error, $return_code) = $anvil->Remote->call({ + debug => $debug, + shell_call => $shell_call, + target => $target, + port => $port, + password => $password, + remote_user => $remote_user, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + else + { + # Local. + ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + } + + if ($return_code) + { + # Something went wrong. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0356", variables => { + return_code => $return_code, + output => $output, + }}); + } + + return($return_code); +} + # =head3 # # Private Functions; diff --git a/Anvil/Tools/Remote.pm b/Anvil/Tools/Remote.pm index 1e4ccfcc..d2516365 100644 --- a/Anvil/Tools/Remote.pm +++ b/Anvil/Tools/Remote.pm @@ -17,6 +17,7 @@ my $THIS_FILE = "Remote.pm"; ### Methods; # add_target_to_known_hosts # call +# test_access # _call_ssh_keyscan # _check_known_hosts_for_target @@ -118,7 +119,7 @@ sub add_target_to_known_hosts my $delete_if_found = defined $parameter->{delete_if_found} ? $parameter->{delete_if_found} : 0; my $port = defined $parameter->{port} ? $parameter->{port} : 22; my $target = defined $parameter->{target} ? $parameter->{target} : ""; - my $user = defined $parameter->{user} ? $parameter->{user} : $<; + my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { delete_if_found => $delete_if_found, port => $port, @@ -288,12 +289,12 @@ sub call $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "cache::ssh_fh::${ssh_fh_key}" => $anvil->data->{cache}{ssh_fh}{$ssh_fh_key} }}); # Now pick up the rest of the variables. - my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 0; - my $no_cache = defined $parameter->{no_cache} ? $parameter->{no_cache} : 0; - my $password = defined $parameter->{password} ? $parameter->{password} : $anvil->data->{sys}{root_password}; - my $secure = defined $parameter->{secure} ? $parameter->{secure} : 0; - my $shell_call = defined $parameter->{shell_call} ? $parameter->{shell_call} : ""; - my $timeout = defined $parameter->{timeout} ? $parameter->{timeout} : 10; + my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 0; + my $no_cache = defined $parameter->{no_cache} ? $parameter->{no_cache} : 0; + my $password = defined $parameter->{password} ? $parameter->{password} : $anvil->data->{sys}{root_password}; + my $secure = defined $parameter->{secure} ? $parameter->{secure} : 0; + my $shell_call = defined $parameter->{shell_call} ? $parameter->{shell_call} : ""; + my $timeout = defined $parameter->{timeout} ? $parameter->{timeout} : 10; my $start_time = time; my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key}; # NOTE: The shell call might contain sensitive data, so we show '--' if 'secure' is set and $anvil->Log->secure is not. @@ -472,11 +473,11 @@ sub call $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { i => $i, message_key => $message_key }}); # Make sure we know the fingerprint of the remote machine - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, key => "log_0158", variables => { target => $target, user => $< }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, key => "log_0158", variables => { target => $target, user => getpwuid($<) }}); $anvil->Remote->add_target_to_known_hosts({ debug => $debug, target => $target, - user => $<, + user => getpwuid($<), }); } elsif ($connect_output =~ /Connection refused/i) @@ -635,6 +636,75 @@ sub call return($output, $error, $return_code); } +=head2 test_access + +This attempts to log into the target to verify that the target is up and reachable. It returns C<< 1 >> on access, C<< 0 >> otherwise. + + my $access = $anvil->Remote->test_access({}); + +Parameters; + +=head3 password (optional) + +This is the password used to connect to the remote target as the given user. + +B: Passwordless SSH is supported. If you can ssh to the target as the given user without a password, then no password needs to be given here. + +=head3 port (optional, default '22') + +This is the TCP port to use when connecting to the C<< target >> over SSH. + +=head3 target (required) + +This is the IP address or (resolvable) host name of the machine who's key we're recording. + +=head3 user (optional, defaults to user running this method) + +This is the user who we're recording the key for. + +=cut +sub test_access +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + + my $password = defined $parameter->{password} ? $parameter->{password} : $anvil->data->{sys}{root_password}; + my $port = defined $parameter->{port} ? $parameter->{port} : 22; + my $target = defined $parameter->{target} ? $parameter->{target} : ""; + my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<); + my $access = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { + password => $anvil->Log->secure ? $password : $anvil->Words->string({key => "log_0186"}), + port => $port, + target => $target, + user => $user, + }}); + + # Call the target + my ($output, $error, $return_code) = $anvil->Remote->call({ + debug => $debug, + password => $password, + shell_call => $anvil->data->{path}{exe}{echo}." 1", + target => $target, + remote_user => $user, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + + if ($output) + { + $access = 1; + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { access => $access }}); + return($access); +} + # =head3 # # Private Functions; @@ -680,7 +750,7 @@ sub _call_ssh_keyscan my $known_hosts = defined $parameter->{known_hosts} ? $parameter->{known_hosts} : ""; my $port = defined $parameter->{port} ? $parameter->{port} : ""; my $target = defined $parameter->{target} ? $parameter->{target} : ""; - my $user = defined $parameter->{user} ? $parameter->{user} : $<; + my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { known_hosts => $known_hosts, port => $port, @@ -769,7 +839,7 @@ sub _check_known_hosts_for_target my $known_hosts = defined $parameter->{known_hosts} ? $parameter->{known_hosts} : ""; my $port = defined $parameter->{port} ? $parameter->{port} : ""; my $target = defined $parameter->{target} ? $parameter->{target} : ""; - my $user = defined $parameter->{user} ? $parameter->{user} : $<; + my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<); my $known_machine = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { delete_if_found => $delete_if_found, diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index 171f7c8d..927a7052 100755 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -15,6 +15,7 @@ my $THIS_FILE = "Server.pm"; # boot # find # get_status +# migrate # shutdown =pod @@ -227,6 +228,7 @@ sub find # Remote call. ($host, my $error, my $host_return_code) = $anvil->Remote->call({ debug => 2, + password => $password, shell_call => $anvil->data->{path}{exe}{hostnamectl}." --static", target => $target, remote_user => "root", @@ -238,6 +240,7 @@ sub find }}); ($virsh_output, $error, $return_code) = $anvil->Remote->call({ debug => 2, + password => $password, shell_call => $anvil->data->{path}{exe}{virsh}." list --all", target => $target, remote_user => "root", @@ -306,7 +309,6 @@ This is the name of the server we're gathering data on. This is the IP or host name of the machine to read the version of. If this is not set, the local system's version is checked. =cut -# NOTE: the version is set in anvil.spec by sed'ing the release and arch onto anvil.version in anvil-core's %post sub get_status { my $self = shift; @@ -636,6 +638,102 @@ sub shutdown return($success); } +=head2 migrate + +This will migrate (push or pull) a server from one node to another. If the migration was successful, C<< 1 >> is returned. Otherwise, C<< 0 >> is returned with a (hopefully) useful error being logged. + +NOTE: It is assumed that sanity checks are completed before this method is called. + +Parameters; + +=head3 server (required) + +This is the name of the server being migrated. + +=head3 source (optional) + +This is the host name (or IP) of the host that we're pulling the server from. + +If set, the server will be pulled. + +=head3 target (optional, defaukt is the full local hostname) + +This is the host name (or IP) Of the host that the server will be pushed to, if C<< source >> is not set. When this is not passed, the local full host name is used as default. + +=cut +sub migrate +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + + my $server = defined $parameter->{server} ? $parameter->{server} : ""; + my $source = defined $parameter->{source} ? $parameter->{source} : ""; + my $target = defined $parameter->{target} ? $parameter->{target} : $anvil->_hostname; + my $success = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + server => $server, + source => $source, + target => $target, + }}); + + if (not $server) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->migrate()", parameter => "server" }}); + return($success); + } + + # Enable dual-primary for any resources we know about for this server. + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{resource}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource => $resource }}); + my ($return_code) = $anvil->DRBD->allow_two_primaries({ + debug => $debug, + resource => $resource, + }); + } + + my $migration_command = $anvil->data->{path}{exe}{virsh}." migrate --undefinesource --tunnelled --p2p --live ".$server." qemu+ssh://".$target."/system"; + if ($source) + { + $migration_command = $anvil->data->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system migrate --undefinesource --tunnelled --p2p --live ".$server." qemu+ssh://".$target."/system"; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { migration_command => $migration_command }}); + + # Call the migration now + my ($output, $return_code) = $anvil->System->call({shell_call => $migration_command}); + if ($return_code) + { + # Something went wrong. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0353", variables => { + server => $server, + target => $target, + return_code => $return_code, + output => $output, + }}); + } + else + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0354"}); + + $success = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); + } + + # Switch off dual-primary. + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{resource}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource => $resource }}); + $anvil->DRBD->reload_defaults({ + debug => $debug, + resource => $resource, + }); + } + + return($success); +} + # =head3 # # Private Functions; @@ -1028,6 +1126,8 @@ sub _parse_definition driver_type => $driver_type, }}); + ### NOTE: Live migration won't work unless the '/dev/drbdX' devices are block. If they come + ### up as 'file', virsh will refuse to migrate with a lack of shared storage error. # A device path can come from 'dev' or 'file'. my $device_path = ""; if (defined $hash_ref->{source}->[0]->{dev}) diff --git a/notes b/notes index 81a4bb7e..1d0a7f10 100644 --- a/notes +++ b/notes @@ -1096,3 +1096,22 @@ sleep 30 pcs stonith create fence-virsh fence_virsh ipaddr=192.168.100.1 login=root passwd=christine pcmk_host_map="amy:rhel8-1;anna:rhel8-2;clara:rhel8-3;fanny:rhel8-4" + +if [ ! -e '/dev/an-a01n01_vg0/srv09-psql_0' ]; +then + /sbin/lvcreate -L 69GiB -n srv09-psql_0 an-a01n01_vg0 +fi +virt-install --connect qemu:///system \ + --name srv09-psql \ + --ram 4096 \ + --arch x86_64 \ + --vcpus 2 \ + --cpu Nehalem,+fsgsbase \ + --cdrom '/shared/files/Win2016_Server_64-bit_English.iso' \ + --boot menu=on \ + --disk path='/shared/files/virtio-win.iso',device=cdrom --force\ + --os-variant win2k8 \ + --network bridge=ifn_bridge1,model=virtio \ + --disk path=/dev/an-a01n01_vg0/srv09-psql_0,bus=virtio,cache=writethrough \ + --graphics spice \ + --noautoconsole --wait -1 > /var/log/anvil-server_srv09-psql.log & diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 0c417dce..d6a96ecf 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -146,7 +146,7 @@ if ($anvil->data->{switches}{test1}) $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n01.digimer.ca"; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n02.digimer.ca"; - #print "Running test 1; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; + print "Running test 1; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; } if ($anvil->data->{switches}{test2}) { @@ -155,19 +155,13 @@ if ($anvil->data->{switches}{test2}) $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n02.digimer.ca"; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n02.digimer.ca"; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n01.digimer.ca"; - #print "Running test 2; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; + print "Running test 2; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; } if ($anvil->data->{switches}{test3}) { $anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; - #print "Running test 3; Boot: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n"; -} -if ($anvil->data->{switches}{test4}) -{ - $anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; - #print "Running test 3; Shut down: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n"; + #print "Running test 3; Boot or shutdown of: [".$anvil->data->{environment}{OCF_RESKEY_name}."].\n"; } # This is for debugging. @@ -614,6 +608,9 @@ sub migrate_server { my ($anvil) = @_; + ### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a + ### user might want to do this (ie: sync will be done soon and the need to evacuate the node + ### ASAP is high). Maybe we'll enforce this and require a '--force' switch later? # If we were given 'migrate_to', we need to make sure the storage is UpToDate on the peer for all # backing resources. We can't check the target's bridges, but the migation will fail if one is # missing. @@ -623,11 +620,173 @@ sub migrate_server my $source = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}; my $target = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - server => $server, - source => $source, - target => $target, + server => $server, + source => $source, + target => $target, + 'switches::migrate_to' => $anvil->data->{switches}{migrate_to}, + 'switches::migrate_from' => $anvil->data->{switches}{migrate_from}, }}); + # Get a view of the servers locally and our peer. + validate_all($anvil); + + # The actual migration command will involve enabling dual primary, then beginning the migration. The + # virsh call will depend on if we're pushing or pulling. Once the migration completes, regardless of + # success or failure, dual primary will be disabled again. + my $migration_command = ""; + my $migrated = 0; + if ($anvil->data->{switches}{migrate_to}) + { + # Can I even connect to the target? + my ($access) = $anvil->Remote->test_access({debug => 2, target => $target}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + if (not $access) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0429", variables => { + server => $server, + target => $target, + }}); + ### TODO: I wonder if this should be exit'ed with '6'? + $anvil->nice_exit({exit_code => 5}); + } + + # Find the server + $anvil->Server->find({debug => 3}); + + my $server_host = defined $anvil->data->{server}{location}{$server}{host} ? $anvil->data->{server}{location}{$server}{host} : ""; + my $server_status = defined $anvil->data->{server}{location}{$server}{status} ? $anvil->data->{server}{location}{$server}{status} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_host => $server_host, + server_status => $server_status, + }}); + + if (not $server_host) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0344", variables => { server => $server }}); + $anvil->nice_exit({exit_code => 1}); + } + + # Get the DRBD status. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0387"}); + $anvil->DRBD->get_status({debug => 2}); + + # Make sure all resource(s) are ready for the server. + my $all_up_to_date = 1; + my $host = $anvil->_short_hostname; + my $peer_name = $anvil->data->{drbd}{config}{$host}{peer}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host => $host, + peer_name => $peer_name, + }}); + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{resource}}) + { + my $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'connection-state'}; + my $peer_node_id = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-node-id'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer_node_id => $peer_node_id, + resource => $resource, + connection_state => $connection_state, + }}); + if (lc($connection_state) ne "connected") + { + # Try to bring the resource up on the peer now. + $anvil->DRBD->manage_resource({ + resource => $resource, + task => "up", + target => $target, + }); + + # We'll give it 20 seconds. + my $wait = 20; + while($wait) + { + $anvil->DRBD->get_status({debug => 2}); + $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'connection-state'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + resource => $resource, + connection_state => $connection_state, + }}); + + if (lc($connection_state) ne "connected") + { + # It's up! + $wait = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'wait' => $wait }}); + } + else + { + $wait--; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'wait' => $wait }}); + + if (not $wait) + { + # We're done waiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0428", variables => { + server => $server, + target => $target, + resource => $resource, + connection_state => $connection_state, + }}); + ### TODO: I wonder if this should be exit'ed with '6'? + $anvil->nice_exit({exit_code => 5}); + } + } + } + + } + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{volume}}) + { + my $peer_disk_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{volume}{$volume}{'peer-disk-state'}; + my $percent_in_sync = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{volume}{$volume}{'percent-in-sync'}; + my $replication_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{volume}{$volume}{'replication-state'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + volume => $volume, + peer_disk_state => $peer_disk_state, + percent_in_sync => $percent_in_sync, + replication_state => $replication_state, + }}); + + if (lc($peer_disk_state) ne "uptodate") + { + $all_up_to_date = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_up_to_date => $all_up_to_date }}); + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_up_to_date => $all_up_to_date }}); + if (not $all_up_to_date) + { + ### TODO: If we decide later to block migration to Inconsistent peers, here's where we'd do it. + } + + # If we're still alive, we're ready to migrate. + ($migrated) = $anvil->Server->migrate({ + debug => 2, + server => $server, + target => $target + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrated => $migrated }}); + } + elsif ($anvil->data->{switches}{migrate_from}) + { + $anvil->Server->find({debug => 3, target => $target}); + + my $host = defined $anvil->data->{server}{location}{$server}{host} ? $anvil->data->{server}{location}{$server}{host} : ""; + my $status = defined $anvil->data->{server}{location}{$server}{status} ? $anvil->data->{server}{location}{$server}{status} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host => $host, + status => $status, + }}); + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrated => $migrated }}); + if (not $migrated) + { + # Exit + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0357"}); + $anvil->nice_exit({exit_code => 1}); + } # If we made it here, we succeeded. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0360"}); @@ -644,7 +803,7 @@ sub validate_all ### if the server is running elsewhere. # Read in an parse the server's XML. - $anvil->System->check_storage({debug => 2}); + $anvil->System->check_storage({debug => 3}); $anvil->Server->get_status({debug => 2, server => $anvil->data->{environment}{OCF_RESKEY_name}}); # Is the name in the definition file what we expect (and did we read the XML data at all)? @@ -675,7 +834,7 @@ sub validate_bridges my ($anvil) = @_; # Get my bridge list - $anvil->System->get_bridges({debug => 2}); + $anvil->System->get_bridges({debug => 3}); # Find the Optical drives and DRBD devices. my $server = $anvil->data->{environment}{OCF_RESKEY_name}; @@ -724,7 +883,7 @@ sub validate_storage { $source = "from_memory"; } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server => $server, source => $source, }}); @@ -784,7 +943,7 @@ sub validate_storage_drbd foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{device}}) { next if not $device_path; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::${server}::device::${device_path}::resource" => $anvil->data->{server}{$server}{device}{$device_path}{resource}, }}); if (not $anvil->data->{server}{$server}{device}{$device_path}{resource}) @@ -799,7 +958,7 @@ sub validate_storage_drbd my $drbd_device = $anvil->data->{server}{$server}{$source}{device}{disk}{target}{$device_target}{path}; my $drbd_resource = $anvil->data->{drbd}{config}{$host}{drbd_path}{$drbd_device}{resource}; my $on_lv = $anvil->data->{drbd}{config}{$host}{drbd_path}{$drbd_device}{on}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host => $host, drbd_device => $drbd_device, drbd_resource => $drbd_resource, @@ -843,26 +1002,6 @@ sub validate_storage_drbd return(0); } -# This processes the DRBD setup JSON data -sub check_drbd_status -{ - my ($anvil, $status_json) = @_; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status_json => $status_json }}); - - - return(0); -} - -# This makes sure that any media in the server's optical drive exists here and is readable. -sub validate_storage_optical -{ - my ($anvil) = @_; - - - - return(0); -} - # This verifies that the requested emulator exists and can be used. sub validate_emulator { @@ -871,7 +1010,7 @@ sub validate_emulator # What emulator is this using? my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $emulator = $anvil->data->{server}{$server}{from_disk}{info}{emulator}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { emulator => $emulator, "server::${server}::from_disk::info::emulator" => $anvil->data->{server}{$server}{from_disk}{info}{emulator} }}); @@ -901,7 +1040,7 @@ sub validate_name my ($anvil) = @_; my $server = $anvil->data->{environment}{OCF_RESKEY_name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server => $server, "server::${server}::from_disk::info::name" => $anvil->data->{server}{$server}{from_disk}{info}{name}, }}); @@ -938,8 +1077,8 @@ sub validate_ram # How mcuh RAM does the server need and how much do we have free? my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server_ram_bytes = $anvil->data->{server}{$server}{from_disk}{memory}; - my $available = $anvil->System->get_free_memory({debug => 2}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + my $available = $anvil->System->get_free_memory({debug => 3}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server_ram_bytes => $anvil->Convert->add_commas({number => $server_ram_bytes})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}).")", available => $anvil->Convert->add_commas({number => $available})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $available}).")", }}); diff --git a/share/words.xml b/share/words.xml index 34fe6592..229e140e 100644 --- a/share/words.xml +++ b/share/words.xml @@ -666,7 +666,7 @@ Output of: [#!variable!command!#] was; The attempt to enable dual-primary for the resource: [#!variable!resource!#] to the node: [#!variable!target_name!# (#!variable!target_node_id!#)] returned a non-zero return code [#!variable!return_code!#]. The returned output (if any) was: [#!variable!output!#]. The migration of: [#!variable!server!#] to the node: [#!variable!target!#] will now begin. The attempt to migrate the server: [#!variable!server!#] to the node: [#!variable!target!#] returned a non-zero return code [#!variable!return_code!#]. The returned output (if any) was: [#!variable!output!#]. - It looks like the migration was successful. Will verify in a moment. + It looks like the migration was successful. Re-disabling dual primary by restoring config file settings. The attempt to reset DRBD to config file settings returned a non-zero return code: [#!variable!return_code!#]. The output, if any, was: [#!variable!output!#]. Failure, exiting with '1'. @@ -745,6 +745,8 @@ Failed to promote the DRBD resource: [#!variable!resource!#] primary. Expected a The server: [#!variable!server!#] will now be gracefully shut down. The server: [#!variable!server!#] is now off. [ Warning ] - The server: [#!variable!server!#] is not yet off after: [#!variable!wait!#] seconds. Giving up waiting. + [ Error ] - The server: [#!variable!server!#] can't by migrated to: [#!variable!target!#] because the resource: [#!variable!resource!#] isn't connected. The current connection state is: [#!variable!connection_state!#]. + [ Error ] - The server: [#!variable!server!#] can't by migrated to: [#!variable!target!#] because we can't reach it at all right now. Test