* In Server->shutdown_virsh(), disabled trying to resume a paused VM. Also updated the logging around not waiting for a VM to stop.

* Updated anvil-safe-stop to check for VMs running, even if the cluster is stopped, when --stop-servers is used.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 2 years ago
parent d271ffec26
commit e90dae96f7
  1. 34
      Anvil/Tools/Server.pm
  2. 3
      share/words.xml
  3. 94
      tools/anvil-safe-stop

@ -1959,21 +1959,22 @@ sub shutdown_virsh
} }
elsif ($status eq "paused") elsif ($status eq "paused")
{ {
### TODO: No, don't do this! The server might be migrating
# The server is paused. Resume it, wait a few, then proceed with the shutdown. # The server is paused. Resume it, wait a few, then proceed with the shutdown.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0314", variables => { server => $server }}); # $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0314", variables => { server => $server }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." resume $server"}); # my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." resume $server"});
if ($return_code) # if ($return_code)
{ # {
# Looks like virsh isn't running. # # Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0315", variables => { # $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0315", variables => {
server => $server, # server => $server,
return_code => $return_code, # return_code => $return_code,
output => $output, # output => $output,
}}); # }});
$anvil->nice_exit({exit_code => 1}); # $anvil->nice_exit({exit_code => 1});
} # }
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0316"}); # $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0316"});
sleep 3; # sleep 3;
} }
elsif ($status eq "pmsuspended") elsif ($status eq "pmsuspended")
{ {
@ -2147,9 +2148,10 @@ WHERE
{ {
# Give up waiting. # Give up waiting.
$waiting = 0; $waiting = 0;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0426", variables => { waiting => $waiting }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0427", variables => { my $key = $wait_time == 1 ? "log_0727" : "log_0427";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => $key, variables => {
server => $server, server => $server,
'wait' => $wait_time, 'wait' => $wait_time,
}}); }});

@ -1372,6 +1372,8 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0416">Done! The server: [#!variable!server!#] is no longer being protected on DR!</key> <key name="job_0416">Done! The server: [#!variable!server!#] is no longer being protected on DR!</key>
<key name="job_0417">The resource config file: [#!variable!config_file!#] doesn't exist locally, pulling a copy over from: [#!variable!source!#].</key> <key name="job_0417">The resource config file: [#!variable!config_file!#] doesn't exist locally, pulling a copy over from: [#!variable!source!#].</key>
<key name="job_0418">Re-parsing the replicated storage configuration.</key> <key name="job_0418">Re-parsing the replicated storage configuration.</key>
<key name="job_0419">The server: [#!variable!server!#] was found to be running outside the cluster. Asking it to shut down now.</key>
<key name="job_0428">The server: [#!variable!server!#] is still running two minutes after asking it to stop. It might have woken up on the first press and ignored the shutdown request (Hi Windows). Pressing the poewr button again.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>
@ -2200,6 +2202,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0724">The file: [#!variable!file!#] needs to be added to the database, but since the last scan it's size grew from: [#!variable!old_size_bytes!# (#!variable!old_size_hr!#)] to: [#!variable!new_size_bytes!# (#!variable!new_size_hr!#)]. A difference of: [#!variable!difference_bytes!# (#!variable!difference_hr!#)]. It might still be being uploaded, so we'll keep checking periodocally until the size stops changing.</key> <key name="log_0724">The file: [#!variable!file!#] needs to be added to the database, but since the last scan it's size grew from: [#!variable!old_size_bytes!# (#!variable!old_size_hr!#)] to: [#!variable!new_size_bytes!# (#!variable!new_size_hr!#)]. A difference of: [#!variable!difference_bytes!# (#!variable!difference_hr!#)]. It might still be being uploaded, so we'll keep checking periodocally until the size stops changing.</key>
<key name="log_0725">Found the missing file: [#!variable!file!#] in the directory: [#!variable!directory!#]. Updating the database now.</key> <key name="log_0725">Found the missing file: [#!variable!file!#] in the directory: [#!variable!directory!#]. Updating the database now.</key>
<key name="log_0726">Deleting the hash key: [#!variable!hash_key!#].</key> <key name="log_0726">Deleting the hash key: [#!variable!hash_key!#].</key>
<key name="log_0727">[ Note ] - The server: [#!variable!server!#] is not yet off, but we've been told not to wait for it to stop.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -277,7 +277,20 @@ sub process_servers
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0321"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0321"});
$anvil->Job->update_progress({progress => 10, message => "job_0321"}); $anvil->Job->update_progress({progress => 10, message => "job_0321"});
} }
# Use virsh to check for servers, in case pacemaker lies to us.
$anvil->Server->find();
my $progress = 10;
my $waiting = 1; my $waiting = 1;
my $first_try = 0;
my $second_try = 0;
my $try_again = 0;
my $server_count = keys %{$anvil->data->{server}{location}};
my $progress_steps = $server_count ? int(35 / $server_count) : 70;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server_count' => $server_count,
's2:progress_steps' => $progress_steps,
}});
while ($waiting) while ($waiting)
{ {
# Is the cluster up? # Is the cluster up?
@ -286,7 +299,84 @@ sub process_servers
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem) if ($problem)
{ {
# Nope. # Nope. Are we stopping servers?
if ($anvil->data->{switches}{'stop-servers'})
{
# Yes, are any servers running (check virsh)
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{server}{location}})
{
my $status = $anvil->data->{server}{location}{$server_name}{status};
my $host_name = $anvil->data->{server}{location}{$server_name}{host_name};
$progress += $progress_steps;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server_name' => $server_name,
's2:status' => $status,
's3:host_name' => $host_name,
's4:progress' => $progress,
}});
if ($host_name eq $anvil->Get->host_name)
{
# Server is still running.
if (($status eq "running") && (not $first_try))
{
# It's running despite the cluster being own, stop it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0419", variables => { server => $server_name }});
$anvil->Job->update_progress({progress => $progress, message => "job_0419,!!server!".$server_name."!!"});
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
wait_time => 1,
});
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
elsif (($status eq "in shutdown") && ($try_again))
{
# Hit the power button again.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0420", variables => { server => $server_name }});
$anvil->Job->update_progress({progress => $progress, message => "job_0420,!!server!".$server_name."!!"});
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
wait_time => 1,
});
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
}
if ($waiting)
{
if (not $first_try)
{
$first_try = time;
$second_try = $first_try + 120;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
first_try => $first_try,
second_try => $second_try,
}});
}
elsif ($try_again)
{
$try_again = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { try_again => $try_again }});
}
elsif (($second_try) && (time > $second_try))
{
$try_again = 1;
$second_try = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
second_try => $second_try,
try_again => $try_again,
}});
}
}
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 80, message => "job_0313"}); $anvil->Job->update_progress({progress => 80, message => "job_0313"});
} }
@ -304,7 +394,7 @@ sub process_servers
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server' => $server, 's1:server' => $server,
's2:status' => $status, 's2:status' => $status,
's2:host_name' => $host_name, 's3:host_name' => $host_name,
's4:role' => $role, 's4:role' => $role,
's5:active' => $active, 's5:active' => $active,
}}); }});

Loading…
Cancel
Save