* Fixed a bug where a stop operation on a server already in shutdown would exit immediately instead of waiting for the server to actually shut off.
Signed-off-by: Digimer <digimer@alteeve.ca>
pcs resource create srv01-c7 ocf:heartbeat:VirtualDomain hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10"
pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op stop timeout="60" on-fail="block" meta allow-migrate="true" failure-timeout="75"
pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op on-fail="block" meta allow-migrate="true" failure-timeout="75"
pcs resource create srv01-c7 ocf:alteeve:server name="srv01-c7" meta allow-migrate="true" op monitor interval="10" op stop on-fail="block" meta allow-migrate="true" failure-timeout="75"
* A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with OCF_RESKEY_. For example, if the resource has an ip parameter set to 192.168.1.1, then the resource agent will have access to an environment variable OCF_RESKEY_ip holding that value.
* A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with OCF_RESKEY_. For example, if the resource has an ip parameter set to 192.168.1.1, then the resource agent will have access to an environment variable OCF_RESKEY_ip holding that value.
*
===
When stopping a server;
14:03 < lge> "on-fail: block"
14:03 < lge> is per operation type.
14:08 < lge> anyways, you can also "on-fail: retry"
OK, set the stop timeout to 60, set 'on-fail: block" and set the failure-timeout to 60 and see how pacemaker reacts.
failure-timeout
===
Migrate servers;
- Let ScanCore set 'node-health' attribute (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-node-health)
- Set 'migration-limit' to '1' to enforce serial live migration (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-cluster-options).
Migrate a single server by setting a location constraint against the node we want the VM off of.
to_log($conf, {message => "We were asked to demote: [".$conf->{environment}{OCF_RESKEY_name}."], which makes no sense and is not supported. Ignoreing.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "We were asked to demote: [".$conf->{environment}{OCF_RESKEY_name}."], which makes no sense and is not supported. Ignoreing.", 'line' => __LINE__, level => 0, priority => "err"});
elsif (($conf->{switches}{help}) or ($conf->{switches}{usage}))
{
{
# Show the usage information
# Show the usage information
show_usage($conf);
show_usage($conf);
@ -369,7 +382,8 @@ sub stop_server
exit(1);
exit(1);
}
}
my $found = 0;
my $shutdown = 1;
my $found = 0;
foreach my $line (split/\n/, $output)
foreach my $line (split/\n/, $output)
{
{
$line =~ s/^\s+//;
$line =~ s/^\s+//;
@ -415,10 +429,16 @@ sub stop_server
to_log($conf, {message => "Pausing for half a minute to give the server time to wake up.", 'line' => __LINE__, level => 2});
to_log($conf, {message => "Pausing for half a minute to give the server time to wake up.", 'line' => __LINE__, level => 2});
sleep 30;
sleep 30;
}
}
elsif (($state eq "in shutdown") or ($state eq "shut off"))
elsif ($state eq "in shutdown")
{
# The server is already shutting down
to_log($conf, {message => "The server: [$server] is already shutting down. We'll monitor it until it actually shuts off.", 'line' => __LINE__, level => 2});
$shutdown = 0;
}
elsif ($state eq "shut off")
{
{
# The server is already shutting down
# The server is already shutting down
to_log($conf, {message => "The server: [$server] is already shutting down.", 'line' => __LINE__, level => 2});
to_log($conf, {message => "The server: [$server] is already off.", 'line' => __LINE__, level => 2});
exit(0);
exit(0);
}
}
elsif (($state eq "idle") or ($state eq "crashed"))
elsif (($state eq "idle") or ($state eq "crashed"))
to_log($conf, {message => "Asking the server: [$server] to shut down now. Please be patient.", 'line' => __LINE__, level => 1});
if ($return_code)
{
{
# Looks like virsh isn't running.
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." shutdown $server");
to_log($conf, {message => "The attempt to shut down the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "Asking the server: [$server] to shut down now. Please be patient.", 'line' => __LINE__, level => 1});
exit(1);
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to shut down the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
# Now loop until we see the server either vanish from virsh or enter "shut off" state. We wait
# Now loop until we see the server either vanish from virsh or enter "shut off" state. We wait
@ -538,17 +559,17 @@ sub server_status
my $current_time = time;
my $current_time = time;
my $timeout = $current_time + int(($conf->{environment}{OCF_RESKEY_CRM_meta_timeout} /= 1000) / 2);
my $timeout = $current_time + int(($conf->{environment}{OCF_RESKEY_CRM_meta_timeout} /= 1000) / 2);
to_log($conf, {message => "The 'virsh' call exited with the return code: [$return_code]. The 'libvirtd' service might be starting, so we will check again shortly.", 'line' => __LINE__, level => 2});
to_log($conf, {message => "The 'virsh' call exited with the return code: [$return_code]. The 'libvirtd' service might be starting, so we will check again shortly.", 'line' => __LINE__, level => 3});