# monitor -(status aliases here) Queries the resource for its state.
# meta-data -Dumps the resource agent metadata.
# promote -Turns a resource into the Master role (Master/Slave resources only).
# demote -Turns a resource into the Slave role (Master/Slave resources only).
# migrate_to - migration target
# migrate_from-Implement live migration of resources.
# validate-all-Validates a resource’s configuration.
# help -(usage maps here) Displays a usage message when the resource agent is invoked from the command line, rather than by the cluster manager.
# notify -Inform resource about changes in state of other clones.
if ($conf->{switches}{start})
{
# Start the server
start_server($conf);
}
elsif ($conf->{switches}{stop})
{
# Stop the server
stop_server($conf);
}
elsif (($conf->{switches}{monitor}) or ($conf->{switches}{status}))
{
# Report the status of the server.
server_status($conf);
}
elsif (($conf->{switches}{metadaata}) or ($conf->{switches}{'meta-data'}))
{
show_metadata($conf);
}
elsif ($conf->{switches}{promote})
{
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3)
to_log($conf, {message => "We were asked to promote: [".$conf->{environment}{OCF_RESKEY_name}."], which makes no sense and is not supported. Ignoreing.", 'line' => __LINE__, level => 0, priority => "err"});
exit(3);
}
elsif ($conf->{switches}{demote})
{
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3)
to_log($conf, {message => "We were asked to demote: [".$conf->{environment}{OCF_RESKEY_name}."], which makes no sense and is not supported. Ignoreing.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "We were asked to notify, but this is not a promotable (we're stateless) agent. Ignoring.", 'line' => __LINE__, level => 0, priority => "warn"});
exit(3);
}
else
{
# We were called in some unexpected way. Log an error, show usage and exit.
# If the server is already here, we'll do nothing else.
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list");
if ($return_code)
{
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
to_log($conf, {message => "It appears that the list the currently running servers returned a non-zero return code: [$return_code]. We will proceed as we may be able to fix this. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server: [$server] is already on this node in the state: [$state], aborting the start request.", 'line' => __LINE__, level => 2});
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
to_log($conf, {message => "All tests passed, yet the attempt to boot the server: [$server] exited with a non-zero return code: [$return_code]. The server is in an unknown state, so exiting with a fatal error. Human intervention is now required. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
to_log($conf, {message => "It appears that the call to boot the server: [$server] worked, but the call to list running servers exited with a non-zero return code: [$return_code]. The server is in an unknown state, so exiting with a fatal error. Human intervention is now required. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server: [$server] has started successfully.", 'line' => __LINE__, level => 2});
exit(0);
}
else
{
# WTF?
to_log($conf, {message => "The server: [$server] should have been started, but it's state is: [$state]. Human intervention is required!", 'line' => __LINE__, level => 1, priority => "err"});
exit(6);
}
last;
}
}
# If we're still alive, then we didn't see the server in the list of running servers, which is really weird.
to_log($conf, {message => "The server: [$server] should have been started, but it wasn't found in the list of running servers.", 'line' => __LINE__, level => 1, priority => "err"});
# Stopping the server is simply a question of "is the server running?" and, if so, stop it.
my $server = $conf->{environment}{OCF_RESKEY_name};
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list");
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to list the running servers returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server: [$server] is running. We will ask it to shut down now.", 'line' => __LINE__, level => 2});
}
elsif ($state eq "paused")
{
# The server is paused. Resume it, wait a few, then proceed with the shutdown.
to_log($conf, {message => "The server: [$server] is paused. Resuming it now so that it can react to the shutdown request.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." resume $server");
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to resume the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
to_log($conf, {message => "Pausing for a moment to give the server time to resume.", 'line' => __LINE__, level => 2});
sleep 3;
}
elsif ($state eq "pmsuspended")
{
# The server is paused. Resume it, wait a few, then proceed with the shutdown.
to_log($conf, {message => "The server: [$server] is asleep. Waking it now so that it can react to the shutdown request.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." dompmwakeup $server");
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to wake the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
to_log($conf, {message => "Pausing for half a minute to give the server time to wake up.", 'line' => __LINE__, level => 2});
to_log($conf, {message => "The server: [$server] is already shutting down. We'll monitor it until it actually shuts off.", 'line' => __LINE__, level => 2});
elsif (($state eq "idle") or ($state eq "crashed"))
{
# The server needs to be destroyed.
to_log($conf, {message => "The server: [$server] is hung. Its state is: [$state]. We will force it off now.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." destroy $server");
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to force-off the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
to_log($conf, {message => "The server: [$server] is now off.", 'line' => __LINE__, level => 2});
exit(0);
}
else
{
# WTF?
to_log($conf, {message => "The server: [$server] is running, but it is in an unexpected state: [$state]. Human intervention is required!", 'line' => __LINE__, level => 1, priority => "err"});
exit(6);
}
last;
}
}
# If we didn't see it, it's off and undefined.
if (not $found)
{
to_log($conf, {message => "The server: [$server] was not listed on this node, so it is not running here.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." shutdown $server");
to_log($conf, {message => "Asking the server: [$server] to shut down now. Please be patient.", 'line' => __LINE__, level => 1});
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to shut down the server: [$server] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
# Now loop until we see the server either vanish from virsh or enter "shut off" state. We wait
# forever and let pacemaker kill us if we time out.
while (1)
{
my $found = 0;
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list");
if ($return_code)
{
# Looks like virsh isn't running.
to_log($conf, {message => "The attempt to list the running servers returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The environment variable 'OCF_RESKEY_CRM_meta_timeout' was not set, so setting it to: [".$conf->{environment}{OCF_RESKEY_CRM_meta_timeout}."].", 'line' => __LINE__, level => 1, priority => "warn"});
to_log($conf, {message => "The 'virsh' call exited with the return code: [$return_code]. The 'libvirtd' may have failed to start. We won't wait any longer.", 'line' => __LINE__, level => 1, priority => "warn"});
to_log($conf, {message => "The 'virsh' call exited with the return code: [$return_code]. The 'libvirtd' service might be starting, so we will check again shortly.", 'line' => __LINE__, level => 3});
# If I got a non-zero return code, something went wrong with the virsh call.
if ($return_code)
{
to_log($conf, {message => "It would appear that libvirtd is not operating (or not operating correctly). Expected the return code '0' but got: [$return_code].", 'line' => __LINE__, level => 0, priority => "err"});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list");
if ($return_code)
{
to_log($conf, {message => "It appears that the call to check if the server: [$server] is on this node returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server: [$server] state is: [$state]. A server must be 'running' in order to migrate it.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
if (not $found)
{
to_log($conf, {message => "The server: [$server] wasn't found on this machine.", 'line' => __LINE__, level => 0, priority => "err"});
if ($conf->{environment}{OCF_RESKEY_CRM_meta_on_node} eq $target)
{
# Yup. All we want to do if make sure it is running here.
to_log($conf, {message => "Verifying that the server: [$server] was successfully migrated here.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $conf->{path}{exe}{virsh}." list");
if ($return_code)
{
# This really shouldn't happen... The migration to here should have failed.
to_log($conf, {message => "While verifying that the server: [$server] migrated here, the attempt to list servers running here returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The migration of the server: [$server] to here was successful!", 'line' => __LINE__, level => 1});
exit(0);
}
}
}
# If we're still alive, we'll proceed as if we're pulling the server to us, and maybe
# that will work.
to_log($conf, {message => "It looks like we were called to verify that the: [$server] migrated here, but it isn't here yet. We'll proceed with an attempt to pull the server over.", 'line' => __LINE__, level => 2});
my ($return_code, $output) = shell_call($conf, $shell_call);
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to enable dual-primary for the resource: [$resource] to the node: [".$conf->{resource}{$resource}{target_name}." (".$conf->{resource}{$resource}{target_node_id}.")] returned a non-zero return code [$return_code]. The returned output (if any) was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
# Disable migration (and any further attempts to enable dual-primary).
my ($return_code, $output) = shell_call($conf, $migration_command);
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to migrate the server: [$server] to the node: [$target] returned a non-zero return code [$return_code]. The returned output (if any) was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The attempt to reset DRBD to config file settings returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
to_log($conf, {message => "It appears that the list the running servers on the migration target: [$target] returned a non-zero return code: [$return_code]. The output, if any, was: [$output].", 'line' => __LINE__, level => 0, priority => "err"});
foreach my $bridge (sort {$a cmp $b} keys %{$conf->{server}{bridges}})
{
if ($conf->{'local'}{bridge}{$bridge})
{
to_log($conf, {message => "The bridge: [$bridge] is available for this server.", 'line' => __LINE__, level => 2});
}
else
{
# Missing bridge.
to_log($conf, {message => "The server wants to connect to the bridge: [$bridge] which we do not have on this node.", 'line' => __LINE__, level => 0, priority => "err"});
exit(5);
}
}
return(0);
}
# This looks up the disks and optical media connected to this server.
sub validate_storage
{
my ($conf) = @_;
# Find the bridge(s) this server uses.
foreach my $device_ref (@{$conf->{server}{definition_xml}->{devices}})
# THis makes sure that the needed backing DRBD devices are on this node. If so, and if they are not up, they
# will be brought up. If that fails, it errors out.
sub validate_storage_drbd
{
my ($conf) = @_;
# Read in the DRBD configuration XML.
my ($return_code, $drbd_body) = shell_call($conf, $conf->{path}{exe}{drbdadm}." dump-xml");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to read the DRBD configuration returned a non-zero code: [$return_code]. The returned output (if any) was: [$drbd_body].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "Recording the local connection details for the resource: [$resource] -> [$address:$port].", 'line' => __LINE__, level => 2});
if (not $conf->{server}{drbd}{'local'}{device}{$device_path}{lv})
{
# The backing LV doesn't exist.
to_log($conf, {message => "The server wants to use: [$device_path] as a hard drive, but we couldn't find the backing logical volume on this node.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server wants to use: [$device_path] as a hard drive, but the backing logical volume: [".$conf->{server}{drbd}{'local'}{device}{$device_path}{lv}."] doesn't exist on this node.", 'line' => __LINE__, level => 0, priority => "err"});
exit(5);
}
else
{
to_log($conf, {message => "The server wants to use: [$device_path] as a hard drive, which is backed by the logical volume: [".$conf->{server}{drbd}{'local'}{device}{$device_path}{lv}."]. Checking that these are ready.", 'line' => __LINE__, level => 1});
($return_code, my $status_json) = shell_call($conf, $conf->{path}{exe}{drbdsetup}." status --json");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to read the DRBD status returned a non-zero code: [$return_code]. The returned output (if any) was: [$status_json].", 'line' => __LINE__, level => 0, priority => "err"});
# If DRBD is not up, the returned JSON output will not actually exist.
if ($status_json =~ /No currently configured DRBD found/si)
{
to_log($conf, {message => "DRBD is not loaded. Bringing it up now.", 'line' => __LINE__, level => 2});
foreach my $device_path (sort {$a cmp $b} keys %{$conf->{server}{disks}})
{
my $resource = $conf->{device_path}{$device_path}{resource};
to_log($conf, {message => "Bringing up the resource: [$resource] for the server's: [".$device_path."] disk.", 'line' => __LINE__, level => 2});
($return_code, my $drbdadm_output) = shell_call($conf, $conf->{path}{exe}{drbdadm}." up $resource");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to start the DRBD resource: [$resource] returned a non-zero code: [$return_code]. The returned output (if any) was: [$drbdadm_output].", 'line' => __LINE__, level => 0, priority => "err"});
($return_code, $status_json) = shell_call($conf, $conf->{path}{exe}{drbdsetup}." status --json");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to read the DRBD status after bringing up the resource(s) for this server returned a non-zero code: [$return_code]. The returned output (if any) was: [$status_json].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# If DRBD is still not up, we're done.
if ($status_json =~ /No currently configured DRBD found/si)
{
to_log($conf, {message => "The attempt to read the DRBD status after bringing up the resource(s) appears to have failed.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
# Process the JSON data. If any disks are not seen, they won't be set to 'ok', which we'll catch next.
my $resource = $conf->{device_path}{$device_path}{resource};
to_log($conf, {message => "The DRBD resource: [$resource] backing the device: [$device_path] was not seen in the 'drbdsetup' status data. Attempting to bringing it up now.", 'line' => __LINE__, level => 2});
($return_code, my $drbdadm_output) = shell_call($conf, $conf->{path}{exe}{drbdadm}." up $resource");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to start the DRBD resource: [$resource] returned a non-zero code: [$return_code]. The returned output (if any) was: [$drbdadm_output].", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "Pausing briefly to give the resources time to start.", 'line' => __LINE__, level => 2});
sleep 3;
# Check again.
$return_code = undef;
$status_json = undef;
to_log($conf, {message => "Checking the DRBD status again.", 'line' => __LINE__, level => 2});
($return_code, $status_json) = shell_call($conf, $conf->{path}{exe}{drbdsetup}." status --json");
if ($return_code)
{
# Something went wrong.
to_log($conf, {message => "The attempt to read the DRBD status after bringing up the resource(s) for this server returned a non-zero code: [$return_code]. The returned output (if any) was: [$status_json].", 'line' => __LINE__, level => 0, priority => "err"});
foreach my $device_path (sort {$a cmp $b} keys %{$conf->{server}{disks}})
{
if ($conf->{server}{disks}{$device_path} eq "check")
{
# Failed.
my $resource = $conf->{device_path}{$device_path}{resource};
to_log($conf, {message => "The DRBD resource: [$resource] backing the device: [$device_path] was not able to start.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The DRBD resource: [$resource] volume: [".$device_ref->{volume}."] locat disk state is: [".$device_ref->{'disk-state'}."]. Unsafe to boot the server unless the disk state is UpToDate.", 'line' => __LINE__, level => 0, priority => "err"});
if (lc($connection_ref->{'peer-role'}) eq "primary")
{
# Don't boot here
if ($conf->{switches}{start})
{
to_log($conf, {message => "The DRBD resource: [$resource] on the peer: [".$connection_ref->{name}."] is 'Primary'. Refusing to boot.", 'line' => __LINE__, level => 0, priority => "err"});
if ((lc($volume_ref->{'peer-disk-state'}) ne "uptodate") && (lc($volume_ref->{'peer-disk-state'}) ne "syncsource"))
{
to_log($conf, {message => "The DRBD resource: [$resource] on the peer: [".$connection_ref->{name}."] is not UpToDate (or SyncSource). Refusing to migrate.", 'line' => __LINE__, level => 0, priority => "err"});
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5).
to_log($conf, {message => "The server has the ISO: [$file] mounted in its optical drive, but that file doesn't exist on this system.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "The server has the ISO: [$file] mounted in its optical drive, which we have, but we can't read it. Check permissions and for SELinux denials.", 'line' => __LINE__, level => 0, priority => "err"});
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5).
to_log($conf, {message => "The server wants to use the emulator: [$emulator] which doesn't exist on this node. Was this server migrated from a different generation Anvil! system? Please update '<emulator>...</emulator>' in the server's definition file: [".$conf->{server}{definition_file}."].", 'line' => __LINE__, level => 0, priority => "err"});
exit(5);
}
if (not -x $emulator)
{
# We can't execute it. Exit with OCF_ERR_PERM (4).
to_log($conf, {message => "The server wants to use the emulator: [$emulator] which exists, but we can't run. Please check permissions and for SELinux denials.", 'line' => __LINE__, level => 0, priority => "err"});
exit(4);
}
return(0);
}
# This makes sure the name we see in the definition file matches what we expect.
sub validate_name
{
my ($conf) = @_;
my $server = $conf->{environment}{OCF_RESKEY_name};
if ($server ne $conf->{server}{definition_xml}->{name}->[0])
{
to_log($conf, {message => "The configured server name: [$server] does not match the name of the server in the definition file: [".$conf->{server}{definition_xml}->{name}."]!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This checks that there is enough RAM to run this server.
sub validate_ram
{
my ($conf) = @_;
# How mcuh RAM does the server need?
my $server_ram_value = $conf->{server}{definition_xml}->{memory}->[0]->{content};
my $server_ram_units = $conf->{server}{definition_xml}->{memory}->[0]->{unit};
# If the file doesn't exist, return OCF_ERR_INSTALLED (5). If the file exists but we can't read it,
# return OCF_ERR_PERM (4).
if (not -e $definition_file)
{
to_log($conf, {message => "The definition file: [$definition_file] for the server: [$server] does not exist here!", 'line' => __LINE__, level => 0, priority => "err"});
exit(5);
}
elsif (not -r $definition_file)
{
to_log($conf, {message => "The definition file: [$definition_file] for the server: [$server] can not be read!", 'line' => __LINE__, level => 0, priority => "err"});
exit(4);
}
# Still alive? Read it in.
my ($definition_xml) = read_file($conf, $definition_file);
# Now die if either number has a non-digit character in it.
if (($whole =~ /\D/) or ($decimal =~ /\D/))
{
to_log($conf, {message => "We were asked to insert commas into a dumber that is not actually a number: [$number]. This is likely a symptom of a larger problem.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
local($_) = $whole ? $whole : "";
1 while s/^(-?\d+)(\d{3})/$1,$2/;
$whole = $_;
my $return = $decimal ? $whole.".".$decimal : $whole;