* Made good progress on anvil-safe-stop. It will now stop or migrate servers (testing needed).

* Updated Server->shutdown_virsh() to change the parameter 'wait' to 'wait_time' to clarify it's use.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 27259d1d53
commit 3a6902d899
  1. 5
      Anvil/Tools/Cluster.pm
  2. 27
      Anvil/Tools/Server.pm
  3. 6
      ocf/alteeve/server
  4. 9
      share/words.xml
  5. 2
      tools/anvil-delete-server
  6. 1
      tools/anvil-migrate-server
  7. 21
      tools/anvil-safe-start
  8. 343
      tools/anvil-safe-stop

@ -2277,7 +2277,7 @@ sub parse_cib
} }
# Now call 'crm_mon --output-as=xml' to determine which resource are running where. As of the time # Now call 'crm_mon --output-as=xml' to determine which resource are running where. As of the time
# of writting this (late 2020), stopped resources are not displayed. So the principle purpose of this # of writing this (late 2020), stopped resources are not displayed. So the principle purpose of this
# call is to determine what resources are running, and where they are running. # call is to determine what resources are running, and where they are running.
$anvil->Cluster->parse_crm_mon({ $anvil->Cluster->parse_crm_mon({
debug => $debug, debug => $debug,
@ -2349,7 +2349,6 @@ sub parse_cib
} }
} }
$anvil->data->{cib}{parsed}{data}{server}{$server}{status} = $status; $anvil->data->{cib}{parsed}{data}{server}{$server}{status} = $status;
$anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} = $host_name; $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} = $host_name;
$anvil->data->{cib}{parsed}{data}{server}{$server}{host_id} = $host_id; $anvil->data->{cib}{parsed}{data}{server}{$server}{host_id} = $host_id;
@ -2565,7 +2564,7 @@ sub parse_quorum
my $parameter = shift; my $parameter = shift;
my $anvil = $self->parent; my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->shutdown_server()" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->parse_quorum()" }});
my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'corosync-quorumtool'}." -p -s"}); my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'corosync-quorumtool'}." -p -s"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {

@ -1713,7 +1713,7 @@ B<WARNING>: Setting this to C<< 1 >> results in the immediate shutdown of the se
This is the name of the server (as it appears in C<< virsh >>) to shut down. This is the name of the server (as it appears in C<< virsh >>) to shut down.
=head3 wait (optional, default '0') =head3 wait_time (optional, default '0', wait indefinitely)
By default, this method will wait indefinetly for the server to shut down before returning. If this is set to a non-zero number, the method will wait that number of seconds for the server to shut dwwn. If the server is still not off by then, C<< 0 >> is returned. By default, this method will wait indefinetly for the server to shut down before returning. If this is set to a non-zero number, the method will wait that number of seconds for the server to shut dwwn. If the server is still not off by then, C<< 0 >> is returned.
@ -1726,14 +1726,15 @@ sub shutdown_virsh
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->shutdown_virsh()" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->shutdown_virsh()" }});
my $server = defined $parameter->{server} ? $parameter->{server} : ""; my $server = defined $parameter->{server} ? $parameter->{server} : "";
my $force = defined $parameter->{force} ? $parameter->{force} : 0; my $force = defined $parameter->{force} ? $parameter->{force} : 0;
my $wait = defined $parameter->{'wait'} ? $parameter->{'wait'} : 0; my $wait_time = defined $parameter->{wait_time} ? $parameter->{wait_time} : 0;
my $success = 0; my $success = 0;
my $server_uuid = ""; my $server_uuid = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
force => $force, force => $force,
server => $server, server => $server,
wait_time => $wait_time,
}}); }});
if (not $server) if (not $server)
@ -1741,10 +1742,10 @@ sub shutdown_virsh
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->shutdown_virsh()", parameter => "server" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->shutdown_virsh()", parameter => "server" }});
return($success); return($success);
} }
if (($wait) && ($wait =~ /\D/)) if (($wait_time) && ($wait_time =~ /\D/))
{ {
# Bad value. # Bad value.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0422", variables => { server => $server, 'wait' => $wait }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0422", variables => { server => $server, wait_time => $wait_time }});
return($success); return($success);
} }
@ -1895,12 +1896,12 @@ WHERE
# Wait indefinetely for the server to exit. # Wait indefinetely for the server to exit.
my $stop_waiting = 0; my $stop_waiting = 0;
if ($wait) if ($wait_time)
{ {
$stop_waiting = time + $wait; $stop_waiting = time + $wait_time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { stop_waiting => $stop_waiting }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { stop_waiting => $stop_waiting }});
}; };
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'wait' => $wait }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { wait_time => $wait_time }});
until($success) until($success)
{ {
# Update # Update
@ -1959,8 +1960,8 @@ WHERE
{ {
# Give up waiting. # Give up waiting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0427", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0427", variables => {
server => $server, server => $server,
'wait' => $wait, wait_time => $wait_time,
}}); }});
} }
else else

@ -983,11 +983,11 @@ sub stop_server
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0582", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0582", variables => { server => $server }});
# Read in an parse the server's XML. # Read in an parse the server's XML.
$anvil->System->check_storage({debug => 3}); $anvil->System->check_storage();
$anvil->Server->get_status({debug => 3, server => $server}); $anvil->Server->get_status({server => $server});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0313", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0313", variables => { server => $server }});
my $success = $anvil->Server->shutdown_virsh({debug => 3, server => $server}); my $success = $anvil->Server->shutdown_virsh({server => $server});
if (not $success) if (not $success)
{ {
# Something went wrong. Details should be in the logs. # Something went wrong. Details should be in the logs.

@ -830,6 +830,14 @@ It should be provisioned in the next minute or two.</key>
<key name="job_0309">Verifying that the server name: [#!variable!server_name!#] is not defined.</key> <key name="job_0309">Verifying that the server name: [#!variable!server_name!#] is not defined.</key>
<key name="job_0310">Verifying that the server name: [#!variable!server_name!#] is not defined on: [#!variable!host_name!#].</key> <key name="job_0310">Verifying that the server name: [#!variable!server_name!#] is not defined on: [#!variable!host_name!#].</key>
<key name="job_0311">Renamed the server name to: [#!variable!server_name!#] in the database.</key> <key name="job_0311">Renamed the server name to: [#!variable!server_name!#] in the database.</key>
<key name="job_0312">We are the SyncSource for the peer: [#!variable!peer_host!#] for the resource/volume: [#!variable!resource!#/#!variable!volume!#]. We have to wait for the peer to complete the sync or close it's connection before we can proceed with shut down.</key>
<key name="job_0313">The cluster has stopped.</key>
<key name="job_0314">Stopping all DRBD resources.</key>
<key name="job_0315">The server: [#!variable!server_name!#] is migrating. Will check again shortly to see if it is done.</key>
<key name="job_0316">Asking the cluster to shut down the server: [#!variable!server_name!#] now.</key>
<key name="job_0317">The server: [#!variable!server!#] has not shut down yet. Asking 'virsh' to shut it down. If the cluster stop woke it up, this should trigger a shutdown. If not, manual shutdown will be required.</key>
<key name="job_0318">The server: [#!variable!server!#] will now be migrated to: [#!variable!node!#]. This could take some time, depending on the amount of RAM allocated to the server, the speed of the BCN and the activity on the server. Please be patient!</key>
<key name="job_0319">No servers are running on this node now.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>
@ -1872,6 +1880,7 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key> <key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key>
<key name="message_0233">It appears that another instance of 'anvil-safe-start' is already runing. Please wait for it to complete (or kill it manually if needed).</key> <key name="message_0233">It appears that another instance of 'anvil-safe-start' is already runing. Please wait for it to complete (or kill it manually if needed).</key>
<key name="message_0234">Preparing to rename a server.</key> <key name="message_0234">Preparing to rename a server.</key>
<key name="message_0235">Preparing to rename stop this node.</key>
<!-- Success messages shown to the user --> <!-- Success messages shown to the user -->
<key name="ok_0001">Saved the mail server information successfully!</key> <key name="ok_0001">Saved the mail server information successfully!</key>

@ -260,7 +260,6 @@ sub remove_from_pacemaker
my $success = $anvil->Server->shutdown_virsh({ my $success = $anvil->Server->shutdown_virsh({
debug => 2, debug => 2,
force => 1, force => 1,
'wait' => 1,
server => $server_name, server => $server_name,
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }});
@ -307,7 +306,6 @@ sub remove_from_pacemaker
my $success = $anvil->Server->shutdown_virsh({ my $success = $anvil->Server->shutdown_virsh({
debug => 2, debug => 2,
force => 1, force => 1,
'wait' => 1,
server => $server_name, server => $server_name,
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }});

@ -30,7 +30,6 @@ my $anvil = Anvil::Tools->new();
# passed directly, it will be used. Otherwise, the password will be read from the database. # passed directly, it will be used. Otherwise, the password will be read from the database.
$anvil->data->{switches}{'job-uuid'} = ""; $anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'no-wait'} = ""; # We normall wait for each migation to finish. This skips that. With '--all', this causes all migrations to run in parallel $anvil->data->{switches}{'no-wait'} = ""; # We normall wait for each migation to finish. This skips that. With '--all', this causes all migrations to run in parallel
$anvil->data->{switches}{'parallel'} = "";
$anvil->data->{switches}{'server'} = ""; $anvil->data->{switches}{'server'} = "";
$anvil->data->{switches}{'server-uuid'} = ""; $anvil->data->{switches}{'server-uuid'} = "";
$anvil->data->{switches}{'target'} = ""; $anvil->data->{switches}{'target'} = "";

@ -12,6 +12,7 @@
# 1 = Any problem that causes an early exit. # 1 = Any problem that causes an early exit.
# #
# TODO: # TODO:
# - Add job support
# - Make this work on DR hosts. # - Make this work on DR hosts.
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts. # - 'pcs quorum unblock' could be useful in sole-survivor cold starts.
# #
@ -50,13 +51,21 @@ if (($< != 0) && ($> != 0))
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
$anvil->data->{switches}{disable} = ""; $anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{enable} = ""; $anvil->data->{switches}{disable} = "";
$anvil->data->{switches}{force} = ""; $anvil->data->{switches}{enable} = "";
$anvil->data->{switches}{'local'} = ""; $anvil->data->{switches}{force} = "";
$anvil->data->{switches}{status} = ""; $anvil->data->{switches}{'local'} = "";
$anvil->data->{switches}{status} = "";
$anvil->Get->switches; $anvil->Get->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::disable' => $anvil->data->{switches}{disable},
'switches::enable' => $anvil->data->{switches}{enable},
'switches::force' => $anvil->data->{switches}{force},
'switches::local' => $anvil->data->{switches}{'local'},
'switches::status' => $anvil->data->{switches}{status},
}});
# If I have no databases, sleep until I do # If I have no databases, sleep until I do
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})

@ -13,6 +13,7 @@ use strict;
use warnings; use warnings;
use Anvil::Tools; use Anvil::Tools;
require POSIX; require POSIX;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
@ -25,7 +26,18 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
$| = 1; $| = 1;
my $anvil = Anvil::Tools->new(); my $anvil = Anvil::Tools->new();
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'power-off'} = ""; # By default, the node is withdrawn. With this switch, the node will power off as well.
$anvil->data->{switches}{'stop-reason'} = ""; # Optionally used to set 'system::stop_reason' reason for this host. Valid values are 'user', 'power' and 'thermal'.
$anvil->data->{switches}{'stop-servers'} = ""; # Default behaviour is to migrate servers to the peer, if the peer is up. This overrides that and forces hosted servers to shut down.
$anvil->Get->switches; $anvil->Get->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::power-off' => $anvil->data->{switches}{'power-off'},
'switches::stop-reason' => $anvil->data->{switches}{'stop-reason'},
'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'},
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Make sure we're running as 'root' # Make sure we're running as 'root'
@ -37,36 +49,333 @@ if (($< != 0) && ($> != 0))
$anvil->nice_exit({exit_code => 1}); $anvil->nice_exit({exit_code => 1});
} }
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, secure => 0, key => "log_0132"});
# If I have no databases, sleep until I do
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# Wait until we have one. # again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
# If we don't have a job UUID, try to find one.
if (not $anvil->data->{switches}{'job-uuid'})
{
# Load the job data.
$anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }});
}
# If we still don't have a job-uuit, go into interactive mode.
if ($anvil->data->{switches}{'job-uuid'})
{
# Load the job data.
$anvil->Job->clear();
$anvil->Job->get_job_details();
$anvil->Job->update_progress({
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0235",
});
until($anvil->data->{sys}{database}{connections}) # Pull out the job data.
foreach my $line (split/\n/, $anvil->data->{jobs}{job_data})
{ {
sleep 10; if ($line =~ /power-off=(.*?)$/)
{
$anvil->refresh(); $anvil->data->{switches}{'power-off'} = $1;
$anvil->Database->connect(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); 'switches::power-off' => $anvil->data->{switches}{'power-off'},
if (not $anvil->data->{sys}{database}{connections}) }});
}
if ($line =~ /stop-reason=(.*?)$/)
{
$anvil->data->{switches}{'stop-reason'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::stop-reason' => $anvil->data->{switches}{'stop-reason'},
}});
}
if ($line =~ /stop-servers=(.*?)$/)
{ {
# Keep waiting $anvil->data->{switches}{'stop-servers'} = $1;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'},
}});
} }
} }
} }
# Make sure we're in an Anvil!
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid();
if (not $anvil->data->{sys}{anvil_uuid})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"});
$anvil->nice_exit({exit_code => 1});
}
# Migrate or stop the servers, if any servers are running here.
process_servers($anvil);
# This waits on DRBD if we're SyncSource
wait_on_drbd($anvil);
exit(0);
# This stops pacemaker, migrating or shutting down servers before hand. It will also shut
stop_cluster($anvil);
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});
############################################################################################################# #############################################################################################################
# Functions # # Functions #
############################################################################################################# #############################################################################################################
# This will migrate or stop
sub process_servers
{
my ($anvil) = @_;
my $waiting = 1;
while ($waiting)
{
# Is the cluster up?
$waiting = 0;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Nope.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 90, message => "job_0313"});
}
else
{
# Loop through the servers running here.
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}})
{
my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status};
my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name};
my $role = $anvil->data->{cib}{parsed}{data}{server}{$server}{role};
my $active = $anvil->data->{cib}{parsed}{data}{server}{$server}{active};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server' => $server,
's2:status' => $status,
's2:host_name' => $host_name,
's4:role' => $role,
's5:active' => $active,
}});
if (lc($role) eq "migrating")
{
# No matter what, if a server is migrating, we wait.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0315", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0315,!!server!".$server."!!"});
}
elsif ($host_name eq $local_name)
{
# Something is running here.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
# This is ours. How shall we deal with it?
if ($anvil->data->{switches}{'stop-servers'})
{
# Have we tried to stop it already? If not, use pcs. If so,
# and if it's been more that 60 seconds, use virsh to try
# again.
if (not exists $anvil->data->{server_shutdown}{$server})
{
# Use PCS.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0316", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0316,!!server!".$server."!!"});
$anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
'wait' => 0,
});
$anvil->data->{server_shutdown}{$server}{pcs_called} = 1;
$anvil->data->{server_shutdown}{$server}{virsh_called} = 0;
$anvil->data->{server_shutdown}{$server}{call_virsh_at} = time + 120;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server_shutdown::${server}::pcs_called" => $anvil->data->{server_shutdown}{$server}{pcs_called},
"server_shutdown::${server}::virsh_called" => $anvil->data->{server_shutdown}{$server}{virsh_called},
"server_shutdown::${server}::call_virsh_at" => $anvil->data->{server_shutdown}{$server}{call_virsh_at},
}});
}
elsif ((not $anvil->data->{server_shutdown}{$server}{virsh_called}) && (time > $anvil->data->{server_shutdown}{$server}{call_virsh_at}))
{
# Use virsh
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0317", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0317,!!server!".$server."!!"});
$anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
'wait' => 0,
});
$anvil->data->{server_shutdown}{$server}{virsh_called} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server_shutdown::${server}::virsh_called" => $anvil->data->{server_shutdown}{$server}{virsh_called},
}});
}
}
else
{
### TODO: Calculate how many gigs worth of RAM we'll migrate,
### and advance the "progress" by the percentage each
### server's RAM represents of the total
# Migrate the servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0318", variables => {
server => $server,
node => $peer_name,
}});
$anvil->Job->update_progress({progress => 30, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"});
$anvil->Cluster->migrate_server({
server => $server,
node => $peer_name,
'wait' => 1,
});
}
}
}
}
if ($waiting)
{
sleep 5;
}
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0319"});
$anvil->Job->update_progress({progress => 30, message => "job_0319"});
exit(0);
return(0);
}
# This takes down or migrates VMs, then withdraws from the cluster.
sub stop_cluster
{
my ($anvil) = @_;
# We need to rename the server in the cluster, and we need both nodes up to do it.
my $waiting = 1;
while($waiting)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
my $local_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
my $peer_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
if (($local_ready) && ($peer_ready))
{
# We're good.
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0295"});
$anvil->Job->update_progress({progress => 15, message => "job_0295"});
}
else
{
# One or both nods are not online yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0296", variables => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
$anvil->Job->update_progress({progress => 10, message => "job_0296,!!local_name!".$local_name."!!,!!peer_name!".$peer_name."!!,!!local_ready!".$local_ready."!!,!!peer_ready!".$peer_ready."!!"});
}
}
else
{
# Cluster hasn't started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"});
$anvil->Job->update_progress({progress => 5, message => "job_0277"});
}
if ($waiting)
{
sleep 10;
}
}
return(0);
}
# This watches DRBD and waits for us to not be SyncSource.
sub wait_on_drbd
{
my ($anvil) = @_;
my $short_host_name = $anvil->Get->short_host_name();
my $waiting = 1;
while ($waiting)
{
# (Re)fresh my view of the storage.
$waiting = 0;
$anvil->DRBD->get_status({debug => 2});
# Now check to see if anything is sync'ing.
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}})
{
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_name => $peer_name }});
foreach my $volume (sort {$a cmp $b} %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}})
{
next if not exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
my $replication_state = $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume,
replication_state => $replication_state,
}});
if ($replication_state =~ /SyncSource/i)
{
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0312", variables => {
peer_host => $peer_name,
resource => $server_name,
volume => $volume,
}});
$anvil->Job->update_progress({progress => 30, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"});
}
}
}
}
if ($waiting)
{
sleep 10;
}
}
# All servers should be down now, so stop DRBD.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0314"});
$anvil->Job->update_progress({progress => 50, message => "job_0314"});
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." down all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
return(0);
}

Loading…
Cancel
Save