* anvil-safe-stop is complete! Testing still needed, of course.

* Updated DRBD->manage_resource() to call 'drbdadm adjust <res>' when starting a resource to help deal with a periodic issue where the 'allow-two-primary' option on the peer doesn't match the local setting.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent edf749ae78
commit f202187c34
  1. 35
      Anvil/Tools/DRBD.pm
  2. 9
      share/words.xml
  3. 185
      tools/anvil-safe-stop

@ -1661,6 +1661,41 @@ sub manage_resource
### TODO: When taking down a resource, check to see if any machine is SyncTarget and take it/them ### TODO: When taking down a resource, check to see if any machine is SyncTarget and take it/them
### down first. See anvil-rename-server -> verify_server_is_off() for the logic. ### down first. See anvil-rename-server -> verify_server_is_off() for the logic.
### TODO: Sanity check the resource name and task requested. ### TODO: Sanity check the resource name and task requested.
### NOTE: For an unknown reason, sometimes a resource is left with allow-two-primary enabled. This
### can block startup, so to be safe, during start, we'll call adjust
if ($task eq "up")
{
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
my $output = "";
my $return_code = 255;
if ($anvil->Network->is_local({host => $target}))
{
# Local.
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
return_code => $return_code,
}});
}
else
{
# Remote call.
($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
target => $target,
port => $port,
password => $password,
remote_user => $remote_user,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
}
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = ""; my $output = "";
my $return_code = 255; my $return_code = 255;

@ -834,10 +834,17 @@ It should be provisioned in the next minute or two.</key>
<key name="job_0313">The cluster has stopped.</key> <key name="job_0313">The cluster has stopped.</key>
<key name="job_0314">Stopping all DRBD resources.</key> <key name="job_0314">Stopping all DRBD resources.</key>
<key name="job_0315">The server: [#!variable!server_name!#] is migrating. Will check again shortly to see if it is done.</key> <key name="job_0315">The server: [#!variable!server_name!#] is migrating. Will check again shortly to see if it is done.</key>
<key name="job_0316">Asking the cluster to shut down the server: [#!variable!server_name!#] now.</key> <key name="job_0316">Asking the cluster to shut down the server: [#!variable!server!#] now.</key>
<key name="job_0317">The server: [#!variable!server!#] has not shut down yet. Asking 'virsh' to shut it down. If the cluster stop woke it up, this should trigger a shutdown. If not, manual shutdown will be required.</key> <key name="job_0317">The server: [#!variable!server!#] has not shut down yet. Asking 'virsh' to shut it down. If the cluster stop woke it up, this should trigger a shutdown. If not, manual shutdown will be required.</key>
<key name="job_0318">The server: [#!variable!server!#] will now be migrated to: [#!variable!node!#]. This could take some time, depending on the amount of RAM allocated to the server, the speed of the BCN and the activity on the server. Please be patient!</key> <key name="job_0318">The server: [#!variable!server!#] will now be migrated to: [#!variable!node!#]. This could take some time, depending on the amount of RAM allocated to the server, the speed of the BCN and the activity on the server. Please be patient!</key>
<key name="job_0319">No servers are running on this node now.</key> <key name="job_0319">No servers are running on this node now.</key>
<key name="job_0320">Will now shut down any servers running on the cluster.</key>
<key name="job_0321">Will now migrate any servers running on the cluster.</key>
<key name="job_0322">Checking to see if we're "SyncSource" for any peer's replicated storage.</key>
<key name="job_0323">Withdrawing this node from the cluster now.</key>
<key name="job_0324">Waiting for the node to finish withdrawing from the cluster.</key>
<key name="job_0325">Shutdown complete, powering off now.</key>
<key name="job_0326">Done. This node is no longer in the cluster.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>

@ -38,7 +38,7 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list
'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'}, 'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'},
}}); }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Make sure we're running as 'root' # Make sure we're running as 'root'
# $< == real UID, $> == effective UID # $< == real UID, $> == effective UID
@ -50,7 +50,7 @@ if (($< != 0) && ($> != 0))
} }
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
@ -123,24 +123,117 @@ process_servers($anvil);
# This waits on DRBD if we're SyncSource # This waits on DRBD if we're SyncSource
wait_on_drbd($anvil); wait_on_drbd($anvil);
# This stops pacemaker
stop_cluster($anvil);
exit(0); # Are we powering off?
if ($anvil->data->{switches}{'power-off'})
{
# Yup
$anvil->Database->update_host_status({
debug => 2,
host_uuid => $anvil->Get->host_uuid,
host_status => "stopping",
});
# This stops pacemaker, migrating or shutting down servers before hand. It will also shut $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0325"});
stop_cluster($anvil); $anvil->Job->update_progress({progress => 100, message => "job_0325"});
my $shell_call = $anvil->data->{path}{exe}{systemctl}." poweroff";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
# Unlikely we're still alive, but 'poweroff' does return once enqueued, so...
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
else
{
# We're not shutting down, so we're done
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0326"});
$anvil->Job->update_progress({progress => 100, message => "job_0326"});
}
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});
############################################################################################################# #############################################################################################################
# Functions # # Functions #
############################################################################################################# #############################################################################################################
# This takes down or migrates VMs, then withdraws from the cluster.
sub stop_cluster
{
my ($anvil) = @_;
# We need to rename the server in the cluster, and we need both nodes up to do it.
my $pacemaker_stopped = 0;
my $waiting = 1;
while($waiting)
{
$waiting = 0;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Cluster has stopped.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0324"});
$anvil->Job->update_progress({progress => 5, message => "job_0324"});
}
else
{
$waiting = 1;
if (not $pacemaker_stopped)
{
# Stop pacemaker now.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0323"});
$anvil->Job->update_progress({progress => 70, message => "job_0323"});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster stop";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
$pacemaker_stopped = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pacemaker_stopped => $pacemaker_stopped }});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 80, message => "job_0313"});
}
}
if ($waiting)
{
sleep 5;
}
}
return(0);
}
# This will migrate or stop # This will migrate or stop
sub process_servers sub process_servers
{ {
my ($anvil) = @_; my ($anvil) = @_;
if ($anvil->data->{switches}{'stop-servers'})
{
# Tell the user we're about to shut down servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0320"});
$anvil->Job->update_progress({progress => 10, message => "job_0320"});
}
else
{
# Tell the user we're about to migrate servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0321"});
$anvil->Job->update_progress({progress => 10, message => "job_0321"});
}
my $waiting = 1; my $waiting = 1;
while ($waiting) while ($waiting)
{ {
@ -152,7 +245,7 @@ sub process_servers
{ {
# Nope. # Nope.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 90, message => "job_0313"}); $anvil->Job->update_progress({progress => 80, message => "job_0313"});
} }
else else
{ {
@ -172,6 +265,7 @@ sub process_servers
's4:role' => $role, 's4:role' => $role,
's5:active' => $active, 's5:active' => $active,
}}); }});
next if lc($role) eq "stopped";
if (lc($role) eq "migrating") if (lc($role) eq "migrating")
{ {
@ -180,7 +274,7 @@ sub process_servers
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0315", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0315", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0315,!!server!".$server."!!"}); $anvil->Job->update_progress({progress => 20, message => "job_0315,!!server!".$server."!!"});
} }
elsif ($host_name eq $local_name) elsif ($host_name eq $local_name)
{ {
@ -198,7 +292,7 @@ sub process_servers
{ {
# Use PCS. # Use PCS.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0316", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0316", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0316,!!server!".$server."!!"}); $anvil->Job->update_progress({progress => 20, message => "job_0316,!!server!".$server."!!"});
$anvil->Cluster->shutdown_server({ $anvil->Cluster->shutdown_server({
debug => 2, debug => 2,
server => $server, server => $server,
@ -217,7 +311,7 @@ sub process_servers
{ {
# Use virsh # Use virsh
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0317", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0317", variables => { server => $server }});
$anvil->Job->update_progress({progress => 30, message => "job_0317,!!server!".$server."!!"}); $anvil->Job->update_progress({progress => 20, message => "job_0317,!!server!".$server."!!"});
$anvil->Cluster->shutdown_server({ $anvil->Cluster->shutdown_server({
debug => 2, debug => 2,
server => $server, server => $server,
@ -239,7 +333,7 @@ sub process_servers
server => $server, server => $server,
node => $peer_name, node => $peer_name,
}}); }});
$anvil->Job->update_progress({progress => 30, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"}); $anvil->Job->update_progress({progress => 20, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"});
$anvil->Cluster->migrate_server({ $anvil->Cluster->migrate_server({
server => $server, server => $server,
node => $peer_name, node => $peer_name,
@ -257,65 +351,6 @@ sub process_servers
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0319"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0319"});
$anvil->Job->update_progress({progress => 30, message => "job_0319"}); $anvil->Job->update_progress({progress => 30, message => "job_0319"});
exit(0);
return(0);
}
# This takes down or migrates VMs, then withdraws from the cluster.
sub stop_cluster
{
my ($anvil) = @_;
# We need to rename the server in the cluster, and we need both nodes up to do it.
my $waiting = 1;
while($waiting)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
my $local_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
my $peer_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
if (($local_ready) && ($peer_ready))
{
# We're good.
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0295"});
$anvil->Job->update_progress({progress => 15, message => "job_0295"});
}
else
{
# One or both nods are not online yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0296", variables => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
$anvil->Job->update_progress({progress => 10, message => "job_0296,!!local_name!".$local_name."!!,!!peer_name!".$peer_name."!!,!!local_ready!".$local_ready."!!,!!peer_ready!".$peer_ready."!!"});
}
}
else
{
# Cluster hasn't started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"});
$anvil->Job->update_progress({progress => 5, message => "job_0277"});
}
if ($waiting)
{
sleep 10;
}
}
return(0); return(0);
} }
@ -325,6 +360,8 @@ sub wait_on_drbd
{ {
my ($anvil) = @_; my ($anvil) = @_;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0322"});
$anvil->Job->update_progress({progress => 40, message => "job_0322"});
my $short_host_name = $anvil->Get->short_host_name(); my $short_host_name = $anvil->Get->short_host_name();
my $waiting = 1; my $waiting = 1;
while ($waiting) while ($waiting)
@ -358,7 +395,7 @@ sub wait_on_drbd
resource => $server_name, resource => $server_name,
volume => $volume, volume => $volume,
}}); }});
$anvil->Job->update_progress({progress => 30, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"}); $anvil->Job->update_progress({progress => 50, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"});
} }
} }
} }
@ -371,11 +408,15 @@ sub wait_on_drbd
# All servers should be down now, so stop DRBD. # All servers should be down now, so stop DRBD.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0314"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0314"});
$anvil->Job->update_progress({progress => 50, message => "job_0314"}); $anvil->Job->update_progress({progress => 60, message => "job_0314"});
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." down all"; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." down all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
return(0); return(0);
} }

Loading…
Cancel
Save