Local modifications to ClusterLabs/Anvil by Alteeve
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2129 lines
88 KiB

#!/usr/bin/perl
#
# This is the resource agent used to manage servers on the Anvil! Intelligent Availability platform.
#
# License: GNU General Public License (GPL) v2+
# (c) 1997-2018 - Alteeve's Niche! Inc.
#
# WARNING: This is a pretty purpose-specific resource agent. No effort was made to test this on an rgmanager
# cluster or on any configuration outside how the Anvil! m3 uses it. If you plan to adapt it to
# another purpose, let us know and we'll try to help.
#
# NOTE: This was initially written with the idea that multiple resources could be used by a single server.
# Now. we use a single resource, named after the server, with 1 or more volumes per resource. As such,
# you will see (for now) an attempt to parse resources, which is not needed and will be removed in
# time.
#
# Based on: https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc
#
# Error types from pacemaker's perspective;
#
# - Soft Error - Unless specifically configured otherwise, pacemaker will attempt to recover a resource
# in-place-usually by restarting the resource on the same node.
# - Hard Error - Unless specifically configured otherwise, pacemaker will attempt to recover a resource
# which failed with this error by restarting the resource on a different node.
# - Fatal Error - This is a cluster-wide error, it would make no sense to recover such a resource on a
# different node, let alone in-place. When a resource fails with this error, Pacemaker will
# attempt to shut down the resource, and wait for administrator intervention.
#
# Exit codes;
# 0 - OCF_SUCCESS
# - The action completed successfully. This is the expected return code for any successful start, stop,
# migrate_to, meta_data, help, and usage action.
# - For monitor, however, a modified convention applies:
# - If the server is running we return, OCF_SUCCESS. If not running and gracefully stopped or migrated
# off, return OCF_NOT_RUNNING.
#
# 1 - OCF_ERR_GENERIC
# - The action returned a generic error. This is used only when none of the more specific error codes,
# defined below, accurately describes the problem.
# - Pacemaker interprets this exit code as a soft error.
#
# 2 - OCF_ERR_ARGS
# - The resource’s configuration is not valid on this machine. This can happen if the serve fails to boot
# because of a missing bridge, for example.
#
# 3 - OCF_ERR_UNIMPLEMENTED
# - The resource agent was instructed to execute an action that we do not implement.
# - Not all resource agent actions are mandatory. We don't implement 'promote' or 'demote'. We do implement
# 'migrate_to', 'migrate_from', and 'notify'. If we're misconfigured as a master/slave resource, for
# example, then will alert the user about this misconfiguration by returning OCF_ERR_UNIMPLEMENTED.
#
# 4 - OCF_ERR_PERM
# - The action failed due to insufficient permissions. This may be due to a node not being able to open a
# definition file or resource config.
# - Pacemaker interprets this exit code as a hard error.
#
# 5 - OCF_ERR_INSTALLED
# - The action failed because a required component is missing on the node where the action was executed.
# This may be due to a required binary not being executable, or a the DRBD resource config file not
# existing.
# - Pacemaker interprets this exit code as a hard error.
#
# 6 - OCF_ERR_CONFIGURED
# - The action failed because the user misconfigured the resource in pacemaker. For example, the user may
# have configured an alphanumeric string for a parameter that really should be an integer.
# - Pacemaker interprets this exit code as a fatal error.
#
# 7 - OCF_NOT_RUNNING
# - The resource was found not to be running. This is an exit code that may be returned by the monitor
# action exclusively. Note that this implies that the resource has either gracefully shut down, or has
# never been started.
#
# 8 - OCF_RUNNING_MASTER
# 9 - OCF_FAILED_MASTER
# - These OCF exit codes are not used here.
#
# NOTE: We don't use Anvil::Tools to keep overhead low and to keep this agent independent as possible.
use strict;
use warnings;
use Anvil::Tools;
use XML::Simple;
use JSON;
use Math::BigInt;
use Data::Dumper;
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
# in the loop as well to override defaults in code.
my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
### Read or Set the environment variables
# This is the name of the server we're managing. # Example values:
$anvil->data->{environment}{OCF_RESKEY_name} = defined $ENV{OCF_RESKEY_name} ? $ENV{OCF_RESKEY_name} : ""; # srv01-c7
# This is our node name
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = defined $ENV{OCF_RESKEY_CRM_meta_on_node} ? $ENV{OCF_RESKEY_CRM_meta_on_node} : ""; # el8-a01n01.digimer.ca
# This says "UUID", but it's the node ID.
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node_uuid} = defined $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} ? $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} : ""; # 1
# This is the timeout for the called action in millisecond.
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = defined $ENV{OCF_RESKEY_CRM_meta_timeout} ? $ENV{OCF_RESKEY_CRM_meta_timeout} : ""; # 20000
# If this is set, we'll bump our log level as well.
$anvil->data->{environment}{PCMK_debug} = defined $ENV{PCMK_debug} ? $ENV{PCMK_debug} : ""; # 0
# These are other variables that are set, but we don't currently care about them
$anvil->data->{environment}{OCF_EXIT_REASON_PREFIX} = defined $ENV{OCF_EXIT_REASON_PREFIX} ? $ENV{OCF_EXIT_REASON_PREFIX} : ""; # ocf-exit-reason:
$anvil->data->{environment}{OCF_RA_VERSION_MAJOR} = defined $ENV{OCF_RA_VERSION_MAJOR} ? $ENV{OCF_RA_VERSION_MAJOR} : ""; # 1
$anvil->data->{environment}{OCF_RA_VERSION_MINOR} = defined $ENV{OCF_RA_VERSION_MINOR} ? $ENV{OCF_RA_VERSION_MINOR} : ""; # 0
$anvil->data->{environment}{OCF_RESKEY_crm_feature_set} = defined $ENV{OCF_RESKEY_crm_feature_set} ? $ENV{OCF_RESKEY_crm_feature_set} : ""; # 3.0.12
$anvil->data->{environment}{OCF_RESOURCE_INSTANCE} = defined $ENV{OCF_RESOURCE_INSTANCE} ? $ENV{OCF_RESOURCE_INSTANCE} : ""; # srv01-c7
$anvil->data->{environment}{OCF_RESOURCE_PROVIDER} = defined $ENV{OCF_RESOURCE_PROVIDER} ? $ENV{OCF_RESOURCE_PROVIDER} : ""; # alteeve
$anvil->data->{environment}{OCF_RESOURCE_TYPE} = defined $ENV{OCF_RESOURCE_TYPE} ? $ENV{OCF_RESOURCE_TYPE} : ""; # server
$anvil->data->{environment}{OCF_ROOT} = defined $ENV{OCF_ROOT} ? $ENV{OCF_ROOT} : ""; # /usr/lib/ocf
# These are set during a migration
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = defined $ENV{OCF_RESKEY_CRM_meta_migrate_source} ? $ENV{OCF_RESKEY_CRM_meta_migrate_source} : ""; # el8-a01n01.digimer.ca
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = defined $ENV{OCF_RESKEY_CRM_meta_migrate_target} ? $ENV{OCF_RESKEY_CRM_meta_migrate_target} : ""; # el8-a01n02.digimer.ca
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_record_pending} = defined $ENV{OCF_RESKEY_CRM_meta_record_pending} ? $ENV{OCF_RESKEY_CRM_meta_record_pending} : ""; # true
# If pacemaker is in debug, so are we,
if ($anvil->data->{environment}{PCMK_debug})
{
$anvil->Log->level({set => 3});
}
# Get any command line switches.
$anvil->Get->switches;
### TEST: to be removed later
if ($anvil->data->{switches}{test1})
{
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000;
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n01.digimer.ca";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n02.digimer.ca";
print "Running test 1; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n";
}
if ($anvil->data->{switches}{test2})
{
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000;
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n02.digimer.ca";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n02.digimer.ca";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n01.digimer.ca";
print "Running test 2; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n";
}
if ($anvil->data->{switches}{test3})
{
$anvil->data->{switches}{start} = "#!set!#";
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca";
print "Running test 3; Boot: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n";
}
if ($anvil->data->{switches}{test4})
{
$anvil->data->{switches}{stop} = "#!set!#";
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server";
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca";
print "Running test 3; Shut down: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n";
}
# Something for the logs
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0298"});
# This is for debugging.
if (($anvil->data->{switches}{monitor}) or
($anvil->data->{switches}{status}) or
($anvil->data->{switches}{'meta-data'}) or
($anvil->data->{switches}{metadaata}))
{
show_environment($anvil, 3);
}
else
{
show_environment($anvil, 3);
}
### What are we being asked to do?
# start -Starts the resource.
# stop -Shuts down the resource.
# monitor -(status aliases here) Queries the resource for its state.
# meta-data -Dumps the resource agent metadata.
# promote -Turns a resource into the Master role (Master/Slave resources only).
# demote -Turns a resource into the Slave role (Master/Slave resources only).
# migrate_to - migration target
# migrate_from-Implement live migration of resources.
# validate-all-Validates a resource’s configuration.
# help -(usage maps here) Displays a usage message when the resource agent is invoked from the command line, rather than by the cluster manager.
# notify -Inform resource about changes in state of other clones.
if ($anvil->data->{switches}{start})
{
# Start the server
start_server($anvil);
}
elsif ($anvil->data->{switches}{stop})
{
# Stop the server
stop_server($anvil);
}
elsif (($anvil->data->{switches}{monitor}) or ($anvil->data->{switches}{status}))
{
# Report the status of the server.
server_status($anvil);
}
elsif (($anvil->data->{switches}{metadaata}) or ($anvil->data->{switches}{'meta-data'}))
{
show_metadata($anvil);
}
elsif ($anvil->data->{switches}{promote})
{
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3)
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0299", variables => { server => $anvil->data->{environment}{OCF_RESKEY_name} }});
$anvil->nice_exit({exit_code => 3});
}
elsif ($anvil->data->{switches}{demote})
{
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3)
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0300", variables => { server => $anvil->data->{environment}{OCF_RESKEY_name} }});
$anvil->nice_exit({exit_code => 3});
}
elsif (($anvil->data->{switches}{migrate_to}) or ($anvil->data->{switches}{migrate_from}))
{
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3)
migrate_server($anvil);
}
elsif ($anvil->data->{switches}{'validate-all'})
{
# Validate our local config and setup.
validate_all($anvil);
$anvil->nice_exit({exit_code => 0});
}
elsif (($anvil->data->{switches}{help}) or ($anvil->data->{switches}{usage}))
{
# Show the usage information
show_usage($anvil);
$anvil->nice_exit({exit_code => 0});
}
elsif ($anvil->data->{switches}{notify})
{
# We don't implement this
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level =>0, key => "log_0301"});
$anvil->nice_exit({exit_code => 3});
}
else
{
# We were called in some unexpected way. Log an error, show usage and exit.
show_environment($anvil, 3);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level =>0, key => "log_0302"});
$anvil->nice_exit({exit_code => 1});
}
# If we hit here, something very wrong happened.
$anvil->nice_exit({exit_code => 255});
#############################################################################################################
# Functions #
#############################################################################################################
=cut
STATES
The State field lists what state each domain is currently in. A domain can be in one of the following
possible states:
running - The domain is currently running on a CPU
idle - The domain is idle, and not running or runnable. This can be caused because the domain is
waiting on IO (a traditional wait state) or has gone to sleep because there was nothing else
for it to do.
paused - The domain has been paused, usually occurring through the administrator running virsh suspend.
When in a paused state the domain will still consume allocated resources like memory, but will
not be eligible for scheduling by the hypervisor.
in shutdown - The domain is in the process of shutting down, i.e. the guest operating system has been
notified and should be in the process of stopping its operations gracefully.
shut off - The domain is not running. Usually this indicates the domain has been shut down completely, or
has not been started.
crashed - The domain has crashed, which is always a violent ending. Usually this state can only occur if
the domain has been configured not to restart on crash.
pmsuspended - The domain has been suspended by guest power management, e.g. entered into s3 state.
=cut
# This boots the server if possible.
sub start_server
{
my ($anvil) = @_;
# Start procedure;
# 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails.
# 2. Make sure the name matches.
# 3. Make sure we have enough free RAM.
# 4. Make sure the emulator exists (can be an issue after migrating from an different gen Anvil!).
# 5.1. Make sure optical drives with mounted data have the disk present. Soft error if not.
# 5.2. Find any backing DRBD devices
# 6. For each DRBD device;
# 6.1. Make sure the backing LV is ACTIVE. Soft error if not.
# 6.2. Check if the drbd resource is up. If not, up it.
# 6.3. Make sure the backing disk is UpToDate. Soft error if not.
# 6.4. Make sure the backing device is 'Connected' or 'Connecting'. Call a connect if not.
# 7. Make sure all bridges exist and soft error if not.
# 8. Start the server.
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0303", variables => { server => $server }});
# If the server is already here, we'll do nothing else.
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "log_0304", variables => { return_code => $return_code, output => $output }});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
# Make sure the server is shut down, if it is listed at all. Any other state is
# unexpected and needs to be sorted by a human.
if ($state ne "shut down")
{
# Abort
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0306", variables => { server => $server, 'state' => $state }});
$anvil->nice_exit({exit_code => 0});
}
last;
}
}
# We need to boot, validate everything.
validate_all($anvil);
# If we're still alive, we're ready to boot.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0305", variables => { server => $server }});
my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { definition_file => $definition_file }});
$return_code = undef;
$output = undef;
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." create $definition_file"});
if ($return_code)
{
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0307", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 6});
}
# Verify that it started.
sleep 2;
$return_code = undef;
$output = undef;
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0308", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 6});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
if ($state eq "running")
{
# Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0309", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
else
{
# WTF?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0310", variables => { server => $server, 'state' => $state }});
$anvil->nice_exit({exit_code => 6});
}
last;
}
}
# If we're still alive, then we didn't see the server in the list of running servers, which is really weird.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0311", variables => { server => $server }});
$anvil->nice_exit({exit_code => 1});
}
# This shuts down the server if possible.
sub stop_server
{
my ($anvil) = @_;
# Stopping the server is simply a question of "is the server running?" and, if so, stop it. Once
# stopped, we stop the DRBD resource on both nodes.
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0312", variables => { return_code => $return_code, output => $output }});
$anvil->nice_exit({exit_code => 1});
}
my $shutdown = 1;
my $found = 0;
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
if ($state eq "running")
{
# The server is running, shut it down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0313", variables => { server => $server }});
}
elsif ($state eq "paused")
{
# The server is paused. Resume it, wait a few, then proceed with the shutdown.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0314", variables => { server => $server }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." resume $server"});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0315", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0316"});
sleep 3;
}
elsif ($state eq "pmsuspended")
{
# The server is paused. Resume it, wait a few, then proceed with the shutdown.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0317", variables => { server => $server }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." dompmwakeup $server"});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0318", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0319"});
sleep 30;
}
elsif ($state eq "in shutdown")
{
# The server is already shutting down
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0320", variables => { server => $server }});
$shutdown = 0;
}
elsif ($state eq "shut off")
{
# The server is already shutting down
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0321", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
elsif (($state eq "idle") or ($state eq "crashed"))
{
# The server needs to be destroyed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0322", variables => {
server => $server,
'state' => $state,
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." destroy $server"});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0323", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
else
{
# WTF?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0325", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => 6});
}
last;
}
}
# If we didn't see it, it's off and undefined.
if (not $found)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0326", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
# If we're alive, it is time to stop the server
if ($shutdown)
{
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." shutdown $server"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0327", variables => { server => $server }});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0328", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
}
# Now loop until we see the server either vanish from virsh or enter "shut off" state. We wait
# forever and let pacemaker kill us if we time out.
while (1)
{
my $found = 0;
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
# Looks like virsh isn't running.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0312", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
if ($state eq "shut off")
{
# We're down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
last;
}
}
# If we didn't find the server, it's off and undefined now.
if (not $found)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0329", variables => { server => $server }});
# Stop DRBD resources now. We don't worry if it actually stops or not (let ScanCore
# handle that). We only care that the server has stopped.
read_server_definition($anvil);
validate_storage($anvil);
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
my $resource = $anvil->data->{device_path}{$device_path}{resource};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
device_path => $device_path,
resource => $resource,
}});
if ((not exists $anvil->data->{drbd}{stopped}{$resource}) or (not $anvil->data->{drbd}{stopped}{$resource}))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0408", variables => {
resource => $resource,
device_path => $device_path,
}});
manage_drbd_resource($anvil, "down", $resource);
$anvil->data->{drbd}{stopped}{$resource} = 1;
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0409", variables => {
resource => $resource,
device_path => $device_path,
}});
}
}
$anvil->nice_exit({exit_code => 0});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0330", variables => { server => $server }});
sleep 5;
}
$anvil->nice_exit({exit_code => 0});
}
# This checks the status of the server.
sub server_status
{
my ($anvil) = @_;
# If the named server is running, return OCF_SUCCESS (0), otherwise OCF_NOT_RUNNING (7). If the
# server is failed, return OCF_ERR_GENERIC (1).
my $state = "";
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
### NOTE: When pacemaker is first starting, virsh won't be up right away. So if we get a return code
### of '1', we'll try again up to 50% of 'environment::OCF_RESKEY_CRM_meta_timeout'.
if (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout})
{
# Set a sane default of 20 seconds.
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "warn", key => "log_0331", variables => { logout => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} }});
}
my $return_code = undef;
my $output = "";
my $current_time = time;
my $timeout = $current_time + int(($anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} /= 1000) / 2);
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
current_time => $current_time,
timeout => $timeout,
}});
while($waiting)
{
# Make the call
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
if (not $return_code)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { waiting => $waiting }});
}
elsif (time > $timeout)
{
# We've waited long enough.
$waiting = 0;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "warn", key => "log_0332", variables => { return_code => $return_code }});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0333", variables => { return_code => $return_code }});
sleep 2;
}
}
# If I got a non-zero return code, something went wrong with the virsh call.
if ($return_code)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0334", variables => { return_code => $return_code }});
if ($output)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0335", variables => {
command => $anvil->data->{path}{exe}{virsh},
output => $output,
}});
}
$anvil->nice_exit({exit_code => 1});
}
# If we're still alive, process the output
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
$state = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
server => $server,
'state' => $state,
}});
last;
}
}
# If there is a state, see what the state is.
if ($state)
{
# What is the state?
# (See the comment below the 'FUNCTIONS' divider above the first function for a full list of states.)
if (($state eq "running") or ($state eq "paused") or ($state eq "pmsuspended") or ($state eq "in shutdown"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0336", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => 0});
}
elsif ($state eq "shut off")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0337", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => $7});
}
elsif (($state eq "idle") or ($state eq "crashed"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0338", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => 1});
}
else
{
# WTF?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0339", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => 1});
}
}
else
{
# Not running. Exit with OCF_NOT_RUNNING
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0340", variables => { server => $server }});
$anvil->nice_exit({exit_code => 7});
}
$anvil->nice_exit({exit_code => 0});
}
# Migrate the server
sub migrate_server
{
my ($anvil) = @_;
# If we were given 'migrate_to', we need to make sure the storage is UpToDate on the peer for all
# backing resources. We can't check the target's bridges, but the migation will fail if one is
# missing.
# If we're given 'migrate_from', we're pulling the server towards us, so we can check both brdiges
# and storage.
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $source = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source};
my $target = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target};
# The actual migration command will involve enabling dual primary, then beginning the migration. The
# virsh call will depend on if we're pushing or pulling. Once the migration completes, regardless of
# success or failure, dual primary will be disabled again.
my $migration_command = "";
my $verify_command = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
source => $source,
target => $target,
}});
if ($anvil->data->{switches}{migrate_to})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0337", variables => {
server => $server,
target => $target,
}});
# Is the server even here?
my $found = 0;
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0342", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
found => $found,
}});
# We can only migrate if it is running.
if (lc($state) ne "running")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0343", variables => {
server => $server,
'state' => $state,
}});
$anvil->nice_exit({exit_code => 1});
}
}
}
if (not $found)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0344", variables => { server => $server }});
$anvil->nice_exit({exit_code => 1});
}
read_server_definition($anvil);
validate_storage($anvil);
# If we're alive, craft the migration command.
$migration_command = $anvil->data->{path}{exe}{virsh}." migrate --undefinesource --live ".$server." qemu+ssh://".$target."/system";
$verify_command = $anvil->data->{path}{exe}{virsh}." list";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
migration_command => $migration_command,
verify_command => $verify_command,
}});
}
elsif ($anvil->data->{switches}{migrate_from})
{
# This is called after a migration. In case this is the case here, the target will be us.
# Just make sure it is running and, if so, return '0'.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"environment::OCF_RESKEY_CRM_meta_on_node" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node},
target => $target,
}});
if ($anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} eq $target)
{
# Yup. All we want to do if make sure it is running here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0345", variables => { server => $server }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"});
if ($return_code)
{
# This really shouldn't happen... The migration to here should have failed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0346", variables => {
server => $server,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
if ($state eq "running")
{
# Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0347", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
}
}
# If we're still alive, we'll proceed as if we're pulling the server to us, and maybe
# that will work.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0348", variables => { server => $server }});
}
# Validate everything, as if we were about to boot
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0349", variables => {
server => $server,
target => $target,
}});
validate_all($anvil);
# If we're alive, craft the migration command.
$migration_command = $anvil->data->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system migrate --undefinesource --live ".$server." qemu+ssh://".$target."/system";
$verify_command = $anvil->data->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system list";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
migration_command => $migration_command,
verify_command => $verify_command,
}});
}
# Enable dual-primary. If this fails, we will disable (or try to) and then abort.
my $migrate = 1;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{resource}})
{
next if not defined $anvil->data->{resource}{$resource}{target_node_id};
next if not $migrate;
my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." net-options ".$resource." ".$anvil->data->{resource}{$resource}{target_node_id}." --allow-two-primaries=yes";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0350", variables => {
resource => $resource,
target_name => $anvil->data->{resource}{$resource}{target_name},
target_node_id => $anvil->data->{resource}{$resource}{target_node_id},
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0346", variables => {
resource => $resource,
target_name => $anvil->data->{resource}{$resource}{target_name},
target_node_id => $anvil->data->{resource}{$resource}{target_node_id},
return_code => $return_code,
output => $output,
}});
# Disable migration (and any further attempts to enable dual-primary).
$migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrate => $migrate }});
}
}
my $migrated = 0;
if ($migrate)
{
# Call the migration.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0352", variables => {
server => $server,
target => $target,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migration_command => $migration_command }});
my ($output, $return_code) = $anvil->System->call({shell_call => $migration_command});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0353", variables => {
server => $server,
target => $target,
return_code => $return_code,
output => $output,
}});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0354"});
$migrated = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrated => $migrated }});
}
}
# Switch off dual-primary.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0355"});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0356", variables => {
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
# Did something go wrong during the dual-primary enable or the actual migration call?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
migrate => $migrate,
migrated => $migrated,
}});
if ((not $migrate) or (not $migrated))
{
# Exit
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0357"});
$anvil->nice_exit({exit_code => 1});
}
# Last, verify that the server is now on the target.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { verify_command => $verify_command }});
$return_code = undef;
$output = undef;
($output, $return_code) = $anvil->System->call({shell_call => $verify_command});
if ($return_code)
{
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to
# also start the server on another node, because we don't know the state of it here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0358", variables => {
target => $target,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
foreach my $line (split/\n/, $output)
{
$line =~ s/^\s+//;
$line =~ s/\s+$//;
$line =~ s/\s+/ /g;
if ($line =~ /^(\d+) $server (.*)$/)
{
my $state = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
'state' => $state,
}});
if ($state eq "running")
{
# Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0359", variables => {
server => $server,
target => $target,
}});
$anvil->nice_exit({exit_code => 0});
}
}
}
# If we made it here, we succeeded.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0360"});
$anvil->nice_exit({exit_code => 0});
}
# Validation checks that we have the definition XML, resource config and that needed apps are installed.
sub validate_all
{
my ($anvil) = @_;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0361"});
# Read in the server's definition file (if found and readable).
read_server_definition($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0362"});
# Does the internal server name match?
validate_name($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0363"});
# Make sure the emulator it wants is the one we have.
validate_emulator($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0364"});
# These tests are only needed if we're about to boot the server
if (($anvil->data->{switches}{start}) or ($anvil->data->{switches}{migrate_from}))
{
# Check that we have enough RAM.
validate_ram($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0365"});
}
# Validate bridges
validate_bridges($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0366"});
# Validate storage (Disks and optical media)
validate_storage($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0367"});
return(0);
}
# This ensures that the bridges the server connects to exist on this node.
sub validate_bridges
{
my ($anvil) = @_;
# Find the Optical drives and DRBD devices.
foreach my $device_ref (@{$anvil->data->{server}{definition_xml}->{devices}})
{
foreach my $interface_ref (@{$device_ref->{interface}})
{
foreach my $source_ref (@{$interface_ref->{source}})
{
my $bridge = $source_ref->{bridge};
$anvil->data->{server}{bridges}{$bridge} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::bridges::${bridge}" => $anvil->data->{server}{bridges}{$bridge} }});
}
}
}
# Get a list of available bridges. We pick up interfaces and MTU data as well, though there really
# isn't any use for it at this time.
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{bridge}." -j link show"});
my $json = JSON->new->allow_nonref;
my $bridge_data = $json->decode($output);
foreach my $hash_ref (@{$bridge_data})
{
my $bridge = $hash_ref->{master};
my $interface = $hash_ref->{ifname};
my $mtu = $hash_ref->{mtu};
$anvil->data->{bridge}{$bridge}{interface}{$interface}{mtu} = $mtu;
$anvil->data->{'local'}{bridge}{$bridge} = 1;
}
# Verify bridges now
foreach my $bridge (sort {$a cmp $b} keys %{$anvil->data->{server}{bridges}})
{
if ($anvil->data->{'local'}{bridge}{$bridge})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0368", variables => { bridge => $bridge }});
}
else
{
# Missing bridge.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0369", variables => { bridge => $bridge }});
$anvil->nice_exit({exit_code => 5});
}
}
return(0);
}
# This looks up the disks and optical media connected to this server.
sub validate_storage
{
my ($anvil) = @_;
# Find the storage device(s) this server uses.
foreach my $device_ref (@{$anvil->data->{server}{definition_xml}->{devices}})
{
foreach my $disk_ref (@{$device_ref->{disk}})
{
my $type = $disk_ref->{device};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }});
if ($type eq "disk")
{
foreach my $source_ref (@{$disk_ref->{source}})
{
my $device_path = $source_ref->{dev};
$anvil->data->{server}{disks}{$device_path} = "check";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path} }});
}
}
elsif ($type eq "cdrom")
{
foreach my $source_ref (@{$disk_ref->{source}})
{
my $file = $source_ref->{file};
$anvil->data->{server}{optical}{$file} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::optical::${file}" => $anvil->data->{server}{optical}{$file} }});
}
}
}
}
# Verify optical disks now, unless we're migrating a server off of us or stopping.
if ((not $anvil->data->{switches}{migrate_to}) && (not $anvil->data->{switches}{stop}))
{
validate_storage_optical($anvil);
}
# Verify DRBD devices now
validate_storage_drbd($anvil);
return(0);
}
# THis makes sure that the needed backing DRBD devices are on this node. If so, and if they are not up, they
# will be brought up. If that fails, it errors out.
sub validate_storage_drbd
{
my ($anvil) = @_;
# Read in the DRBD configuration XML.
my ($drbd_body, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." dump-xml"});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0370", variables => {
return_code => $return_code,
drbd_body => $drbd_body,
}});
$anvil->nice_exit({exit_code => 1});
}
# Parse the XML
my $drbd_xml = "";
my $xml = XML::Simple->new();
eval { $drbd_xml = $xml->XMLin($drbd_body, KeyAttr => ["name", "vnr"], ForceArray => 1) };
if ($@)
{
chomp $@;
my $error = "[ Error ] - The was a problem parsing: [$drbd_body]. The error was:\n";
$error .= "===========================================================\n";
$error .= $@."\n";
$error .= "===========================================================\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }});
$anvil->nice_exit({exit_code => 1});
}
foreach my $resource (sort {$a cmp $b} keys %{$drbd_xml->{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
# Figure out who I am and who my peer is, ignoring DR host(s).
my $peer = "";
my $local = "";
foreach my $connection_ref (@{$drbd_xml->{resource}->{$resource}->{connection}})
{
my $protocol = $connection_ref->{section}->{net}->{option}->{protocol}->{value};
my $fencing = $connection_ref->{section}->{net}->{option}->{fencing}->{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
protocol => $protocol,
fencing => $fencing,
}});
# If this isn't set to 'resource-and-stonith', it's a DR connection and we'll ignore
# it.
next if $fencing ne "resource-and-stonith";
# Look at the hosts
foreach my $host (sort {$a cmp $b} keys %{$connection_ref->{host}})
{
my $address = $connection_ref->{host}->{$host}->{address}->[0]->{content};
my $port = $connection_ref->{host}->{$host}->{address}->[0]->{port};
my $short_hostname = $host;
$short_hostname =~ s/\..*$//;
my $local_hostname = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
host => $host,
short_hostname => $short_hostname,
address => $address,
port => $port,
local_hostname => $local_hostname,
}});
# Is this me or the peer?
if (($local_hostname eq $short_hostname) or ($local_hostname =~ /^$short_hostname\./))
{
# This is us.
$local = $host;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0371", variables => {
resource => $resource,
address => $address,
port => $port,
}});
$anvil->data->{server}{drbd}{'local'}{hostname} = $host,
$anvil->data->{server}{drbd}{'local'}{short_hostname} = $short_hostname,
$anvil->data->{server}{drbd}{'local'}{address} = $address,
$anvil->data->{server}{drbd}{'local'}{port} = $port,
# Record my node name for this resource (to be paired with the node
# ID when migrating)
$anvil->data->{resource}{$resource}{local_node_name} = $host;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "resource::${resource}::local_node_name" => $anvil->data->{resource}{$resource}{local_node_name} }});
}
else
{
# This is our peer
$peer = $host;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0372", variables => {
resource => $resource,
address => $address,
port => $port,
}});
$anvil->data->{server}{drbd}{peer}{hostname} = $host,
$anvil->data->{server}{drbd}{peer}{short_hostname} = $short_hostname,
$anvil->data->{server}{drbd}{peer}{address} = $address,
$anvil->data->{server}{drbd}{peer}{port} = $port,
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
'local' => $local,
peer => $peer,
}});
foreach my $volume (sort {$a cmp $b} keys %{$drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}})
{
# The backing device is the logical volume underpinning this DRBD device on this node.
my $backing_device = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{disk}->[0];
my $device_path = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{device}->[0]->{content};
my $device_minor = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{device}->[0]->{minor};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
volume => $volume,
backing_device => $backing_device,
device_path => $device_path,
device_minor => $device_minor,
}});
$anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv} = $backing_device;
$anvil->data->{server}{drbd}{'local'}{device}{$device_path}{minor} = $device_minor;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"server::drbd::local::device::${device_path}::lv" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv},
"server::drbd::local::device::${device_path}::minor" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{minor},
}});
# Map the resource name to the local drbd device path.
$anvil->data->{resource}{$resource}{volume}{$volume}{lv} = $backing_device;
$anvil->data->{resource}{$resource}{volume}{$volume}{path} = $device_path;
$anvil->data->{device_path}{$device_path}{resource} = $resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"resource::${resource}::volume::${volume}::path" => $anvil->data->{resource}{$resource}{volume}{$volume}{path},
"resource::${resource}::volume::${volume}::lv" => $anvil->data->{resource}{$resource}{volume}{$volume}{lv},
"device_path::${device_path}::resource" => $anvil->data->{device_path}{$device_path}{resource},
}});
}
}
# Pair the volumes to their backing LVs.
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0373", variables => { device_path => $device_path }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::drbd::local::device::${device_path}::lv" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv} }});
if (not $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv})
{
# The backing LV doesn't exist.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 0, priority => "err", key => "log_0374", variables => { device_path => $device_path }});
$anvil->nice_exit({exit_code => 5});
}
elsif (not -e $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv})
{
# The backing LV doesn't exist.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 0, priority => "err", key => "log_0375", variables => {
device_path => $device_path,
lv => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv},
}});
$anvil->nice_exit({exit_code => 5});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0376", variables => {
device_path => $device_path,
lv => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv},
}});
}
}
# If we're in a stop operation, we're done.
if ($anvil->data->{switches}{stop})
{
return(0);
}
# Now read in the status of the drbd devices
$return_code = undef;
(my $status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
status_json => $status_json,
return_code => $return_code,
json_length => length($status_json),
}});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0377", variables => {
return_code => $return_code,
status_json => $status_json,
}});
$anvil->nice_exit({exit_code => 1});
}
# If DRBD is not up, the returned JSON output will not actually exist.
if (($status_json =~ /No currently configured DRBD found/si) or (not check_drbd_status($anvil, $status_json)))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0378"});
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
my $resource = $anvil->data->{device_path}{$device_path}{resource};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_path => $device_path,
resource => length($resource),
}});
if ((not exists $anvil->data->{drbd}{started}{$resource}) or (not $anvil->data->{drbd}{started}{$resource}))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0379", variables => {
resource => $resource,
device_path => $device_path,
}});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0407", variables => {
resource => $resource,
device_path => $device_path,
}});
next;
}
manage_drbd_resource($anvil, "up", $resource);
$anvil->data->{drbd}{started}{$resource} = 1;
}
# Give them a few seconds to start.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0381"});
sleep 3;
# Check DRBD setup again
$return_code = undef;
$status_json = undef;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0385"});
($status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
status_json => $status_json,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0382", variables => {
return_code => $return_code,
status_json => $status_json,
}});
$anvil->nice_exit({exit_code => 1});
}
# If DRBD is still not up, we're done.
if (($status_json =~ /No currently configured DRBD found/si) or (length($status_json) < 5))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0383"});
$anvil->nice_exit({exit_code => 1});
}
}
# Process the JSON data. If any disks are not seen, they won't be set to 'ok', which we'll catch next.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status_json => $status_json }});
check_drbd_status($anvil, $status_json);
### NOTE: The checks below might no longer be needed.
=cut
# Make sure I saw all disks.
my $check_again = 0;
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
device_path => $device_path,
"server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path},
}});
if ($anvil->data->{server}{disks}{$device_path} eq "check")
{
# Failed to see it, see if we can bring it up.
$check_again = 1;
my $resource = $anvil->data->{device_path}{$device_path}{resource};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0384", variables => {
resource => $resource,
device_path => $device_path,
}});
#manage_drbd_resource($anvil, "up", $resource);
(my $drbdadm_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." up $resource"});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0380", variables => {
resource => $resource,
return_code => $return_code,
drbdadm_output => $drbdadm_output,
}});
$anvil->nice_exit({exit_code => 1});
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { check_again => $check_again }});
if ($check_again)
{
# Give the resource a few seconds to start.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0381"});
sleep 3;
# Check again.
$return_code = undef;
$status_json = undef;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0385"});
($status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"});
if ($return_code)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0382", variables => {
return_code => $return_code,
status_json => $status_json,
}});
$anvil->nice_exit({exit_code => 1});
}
# Check again.
check_drbd_status($anvil, $status_json);
}
}
# Do I need to check again?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {check_again => $check_again }});
if ($check_again)
{
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
if ($anvil->data->{server}{disks}{$device_path} eq "check")
{
# Failed.
my $resource = $anvil->data->{device_path}{$device_path}{resource};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0386", variables => {
resource => $resource,
device_path => $device_path,
}});
$anvil->nice_exit({exit_code => 1});
}
}
}
=cut
### TODO: Finish this, whatever this was going to be...
# If I am about to push a server off, we need to make sure the peer is UpToDate
if ($anvil->data->{switches}{migrate_to})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0387"});
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}})
{
}
}
return(0);
}
# This processes the DRBD setup JSON data
sub check_drbd_status
{
my ($anvil, $status_json) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status_json => $status_json }});
my $json = JSON->new->allow_nonref;
my $drbd_status = $json->decode($status_json);
my $resource_found = 0;
foreach my $resource_ref (@{$drbd_status})
{
my $resource = $resource_ref->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
# Record my node ID for this resource
$anvil->data->{resource}{$resource}{local_node_id} = $resource_ref->{'node-id'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "resource::${resource}::local_node_id" => $anvil->data->{resource}{$resource}{local_node_id} }});
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{resource}{$resource}{volume}})
{
my $device_path = $anvil->data->{resource}{$resource}{volume}{$volume}{path};
my $logical_volume = $anvil->data->{resource}{$resource}{volume}{$volume}{lv};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:volume' => $volume,
's2:device_path' => $device_path,
's3:logical_volume' => $logical_volume,
}});
if ((exists $anvil->data->{server}{disks}{$device_path}) && ($anvil->data->{server}{disks}{$device_path} eq "check"))
{
### This disk is in use by this server, check it.
$resource_found = 1;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0388", variables => { device_path => $device_path }});
# We can't run the server here until our device(s) are UpToDate or SyncSource.
if (($anvil->data->{switches}{start}) or ($anvil->data->{switches}{migrate_from}))
{
foreach my $device_ref (@{$resource_ref->{devices}})
{
# Are we UpToDate (or SyncSource)?
if ((lc($device_ref->{'disk-state'}) ne "uptodate") && (lc($device_ref->{'disk-state'}) ne "syncsource"))
{
# We can't run here.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0389", variables => {
resource => $resource,
volume => $device_ref->{volume},
disk_state => $device_ref->{'disk-state'},
}});
$anvil->nice_exit({exit_code => 1});
}
else
{
# We're good.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0390", variables => {
resource => $resource,
volume => $device_ref->{volume},
disk_state => $device_ref->{'disk-state'},
}});
}
}
}
# If we're booting a server, we need to be sure that *no* peer is Primary.
foreach my $connection_ref (@{$resource_ref->{connections}})
{
# If we're not connected, skip.
my $connection_state = $connection_ref->{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { connection_state => $connection_state }});
next if lc($connection_state) ne "connected";
# Is the peer's role Primary? In all cases, we abort if so.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0391", variables => { name => $connection_ref->{name} }});
if ((lc($connection_ref->{'peer-role'}) eq "primary") && ($anvil->data->{switches}{start}))
{
# Don't boot here
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0392", variables => {
resource => $resource,
name => $connection_ref->{name}
}});
$anvil->nice_exit({exit_code => 1});
}
# If we're migrating to the peer, make sure the target disk state is UpToDate
# or SyncSource.
if ($anvil->data->{switches}{migrate_to})
{
# Is this connection to our migration target?
my $peer_short_name = $connection_ref->{name};
$peer_short_name =~ s/\..*$//;
my $migration_target = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target};
$migration_target =~ s/\..*$//;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_short_name => $peer_short_name,
migration_target => $migration_target,
}});
if ($peer_short_name ne $migration_target)
{
# Ignore this, it isn't our target
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0394", variables => { peer_short_name => $peer_short_name }});
next;
}
# We will need the node ID to enable dual-primary.
$anvil->data->{resource}{$resource}{target_name} = $connection_ref->{name};
$anvil->data->{resource}{$resource}{target_node_id} = $connection_ref->{'peer-node-id'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"resource::${resource}::target_name" => $anvil->data->{resource}{$resource}{target_name},
"resource::${resource}::target_node_id" => $anvil->data->{resource}{$resource}{target_node_id},
}});
# If we're still alive, we want to ensure all volumes are UpToDate.
foreach my $volume_ref (@{$connection_ref->{peer_devices}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume_ref->{volume},
disk_state => $volume_ref->{'peer-disk-state'},
}});
if ((lc($volume_ref->{'peer-disk-state'}) ne "uptodate") && (lc($volume_ref->{'peer-disk-state'}) ne "syncsource"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0395", variables => {
resource => $resource,
name => $connection_ref->{name}
}});
$anvil->nice_exit({exit_code => 1});
}
}
}
}
# If we're here, it's OK.
$anvil->data->{server}{disks}{$device_path} = "ok";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path} }});
}
else
{
# Ignoring, not used.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0396", variables => { device_path => $device_path }});
}
}
# If we're still alive and we're booting a server, make sure the local resource is Primary
my $role = $resource_ref->{role};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { role => $role }});
if (($anvil->data->{switches}{start}) && (lc($role) eq "secondary"))
{
# Go primary.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0410", variables => {
resource => $resource,
role => $role,
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." primary ".$resource});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0411", variables => {
resource => $resource,
return_code => $return_code,
output => $output,
}});
$anvil->nice_exit({exit_code => 1});
}
}
}
return($resource_found);
}
# This makes sure that any media in the server's optical drive exists here and is readable.
sub validate_storage_optical
{
my ($anvil) = @_;
foreach my $file (sort {$a cmp $b} keys %{$anvil->data->{server}{optical}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0397", variables => { file => $file }});
# If the file doesn't exist, exit with OCF_ERR_INSTALLED (5). If we can't read it, exit with
# OCF_ERR_PERM (4).
if (not -e $file)
{
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5).
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0398", variables => { file => $file }});
$anvil->nice_exit({exit_code => 5});
}
elsif (not -r $file)
{
# We can't read it. Exit with OCF_ERR_PERM (4).
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0399", variables => { file => $file }});
$anvil->nice_exit({exit_code => 4});
}
else
{
# We're OK.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0400", variables => { file => $file }});
}
}
return(0);
}
# This verifies that the requested emulator exists and can be used.
sub validate_emulator
{
my ($anvil) = @_;
# What emulator is this using?
my $emulator = $anvil->data->{server}{definition_xml}->{devices}->[0]->{emulator}->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { emulator => $emulator }});
if (not -e $emulator)
{
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5).
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0401", variables => {
emulator => $emulator,
definition_file => $anvil->data->{server}{definition_file},
}});
$anvil->nice_exit({exit_code => 5});
}
if (not -x $emulator)
{
# We can't execute it. Exit with OCF_ERR_PERM (4).
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0402", variables => { emulator => $emulator }});
$anvil->nice_exit({exit_code => 4});
}
return(0);
}
# This makes sure the name we see in the definition file matches what we expect.
sub validate_name
{
my ($anvil) = @_;
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
if ($server ne $anvil->data->{server}{definition_xml}->{name}->[0])
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0403", variables => {
server => $server,
name => $anvil->data->{server}{definition_xml}->{name},
}});
$anvil->nice_exit({exit_code => 1});
}
return(0);
}
# This checks that there is enough RAM to run this server.
sub validate_ram
{
my ($anvil) = @_;
# How mcuh RAM does the server need?
my $server_ram_value = $anvil->data->{server}{definition_xml}->{memory}->[0]->{content};
my $server_ram_units = $anvil->data->{server}{definition_xml}->{memory}->[0]->{unit};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
server_ram_value => $server_ram_value,
server_ram_units => $server_ram_units,
}});
# Convert to bytes
my $server_ram_bytes = $anvil->Convert->human_readable_to_bytes({size => $server_ram_value, type => $server_ram_units, base2 => 1 });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
server_ram_bytes => $server_ram_bytes." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}).")",
}});
# How much RAM do we have available?
my $available = 0;
my ($free_output, $free_rc) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{free}." --bytes"});
foreach my $line (split/\n/, $free_output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
if ($line =~ /Mem:\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)$/)
{
my $total = $1;
my $used = $2;
my $free = $3;
my $shared = $4;
my $cache = $5;
$available = $6;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
total => $total." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $total})."})",
used => $used." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $used})."})",
free => $free." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $free})."})",
shared => $shared." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $shared})."})",
cache => $cache." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $cache})."})",
available => $available." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $available})."})",
}});
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
server_ram_bytes => $anvil->Convert->add_commas({number => $server_ram_bytes})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}).")",
available => $anvil->Convert->add_commas({number => $available})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $available}).")",
}});
if ($server_ram_bytes > $available)
{
# Not enough free memory.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0404", variables => {
name => $anvil->data->{environment}{OCF_RESKEY_name},
ram => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}),
ram_bytes => $anvil->Convert->add_commas({number => $server_ram_bytes}),
available_ram => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}),
available_ram_bytes => $anvil->Convert->add_commas({number => $available}),
}});
$anvil->nice_exit({exit_code => 1});
}
return(0);
}
### TODO: Make sure the appropriate SN ports are opened.
# This stops (drbdadm down <server>) the storage for a given server on both nodes.
sub manage_drbd_resource
{
my ($anvil, $task, $resource) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'server::drbd::peer::hostname' => $anvil->data->{server}{drbd}{peer}{hostname},
}});
# Stop the resource on the peer, then stop it here.
my $peer_hostname = $anvil->data->{server}{drbd}{peer}{hostname};
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_hostname => $peer_hostname,
shell_call => $shell_call,
}});
my ($output, $error, $return_code) = $anvil->Remote->call({
debug => 2,
shell_call => $shell_call,
target => $peer_hostname,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
# Now call it locally
$output = undef;
$return_code = undef;
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
return(0);
}
# This reads the XML definition data into an XML data hash.
sub read_server_definition
{
my ($anvil) = @_;
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml";
my $server_xml = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
server => $server,
definition_file => $definition_file,
}});
# If the file doesn't exist, return OCF_ERR_INSTALLED (5). If the file exists but we can't read it,
# return OCF_ERR_PERM (4).
if (not -e $definition_file)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0405", variables => {
definition_file => $definition_file,
server => $server,
}});
$anvil->nice_exit({exit_code => 5});
}
elsif (not -r $definition_file)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0406", variables => {
definition_file => $definition_file,
server => $server,
}});
$anvil->nice_exit({exit_code => 4});
}
# Still alive? Read it in.
my $definition_xml = $anvil->Storage->read_file({file => $definition_file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { definition_file => $definition_file }});
my $xml = XML::Simple->new();
eval { $server_xml = $xml->XMLin($definition_xml, KeyAttr => {}, ForceArray => 1) };
if ($@)
{
chomp $@;
my $error = "[ Error ] - The was a problem parsing: [$definition_file]. The error was:\n";
$error .= "===========================================================\n";
$error .= $@."\n";
$error .= "===========================================================\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }});
$anvil->nice_exit({exit_code => 1});
}
$anvil->data->{server}{definition_xml} = $server_xml;
$anvil->data->{server}{definition_file} = $definition_file;
return(0);
}
# This logs the details of this call.
sub show_environment
{
my ($anvil, $level) = @_;
foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{switches}})
{
next if $key eq "raw";
next if $anvil->data->{switches}{$key} eq "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "switches::${key}" => $anvil->data->{switches}{$key} }});
}
foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{environment}})
{
next if $anvil->data->{environment}{$key} eq "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "environment::${key}" => $anvil->data->{environment}{$key} }});
}
foreach my $key (sort {$a cmp $b} keys %ENV)
{
next if exists $anvil->data->{environment}{$key};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "ENV::${key}" => $ENV{$key} }});
}
return(0);
}
# This just prints a quick usage message for now.
sub show_usage
{
my ($anvil) = @_;
### TODO: How to use this...
$anvil->nice_exit({exit_code => 0});
}
# This prints out the metadata and exits.
sub show_metadata
{
my ($anvil) = @_;
# This is a pretty simple agent, by design. We only take a server name for now.
print '<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ocs:alteeve:server" version="0.1">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages KVM+qemu virtual servers on an Anvil! m3 Intelligent Availability(tm) system.
It manages underlying components like DRBD 9 storage resources, brodge connections and so forth.
</longdesc>
<shortdesc lang="en">Anvil! m3 server resource agent</shortdesc>
<parameters>
<parameter name="name" unique="1" required="1">
<longdesc lang="en">
This is the name of the server as reported by virsh.
</longdesc>
<shortdesc lang="en">Server name</shortdesc>
<content type="string"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="60" />
<action name="monitor" timeout="10" />
<action name="notify" timeout="20" />
<action name="migrate_to" timeout="600" />
<action name="migrate_from" timeout="600" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
';
$anvil->nice_exit({exit_code => 0});
}
# This gathers command line switches and stores them in 'swithes::<foo>'.
sub get_switches
{
my ($anvil) = @_;
my $last_argument = "";
$anvil->data->{switches}{raw} = "";
foreach my $argument (@ARGV)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { argument => $argument }});
if ($last_argument eq "raw")
{
# Don't process anything.
$anvil->data->{switches}{raw} .= " ".$argument;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::raw" => $anvil->data->{switches}{raw} }});
}
elsif ($argument =~ /^-/)
{
# If the argument is just '--', appeand everything after it to 'raw'.
if ($argument eq "--")
{
$last_argument = "raw";
$anvil->data->{switches}{raw} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::raw" => $anvil->data->{switches}{raw} }});
}
else
{
($last_argument) = ($argument =~ /^-{1,2}(.*)/)[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { last_argument => $last_argument }});
if ($last_argument =~ /=/)
{
# Break up the variable/value.
($last_argument, my $value) = (split /=/, $last_argument, 2);
$anvil->data->{switches}{$last_argument} = $value;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::${last_argument}" => $anvil->data->{switches}{$last_argument} }});
}
else
{
$anvil->data->{switches}{$last_argument} = "#!SET!#";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::${last_argument}" => $anvil->data->{switches}{$last_argument} }});
}
}
}
else
{
if ($last_argument)
{
$anvil->data->{switches}{$last_argument} = $argument;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::${last_argument}" => $anvil->data->{switches}{$last_argument} }});
$last_argument = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { last_argument => $last_argument }});
}
else
{
# Got a value without an argument. That's OK.
$anvil->data->{switches}{$argument} = "#!SET!#";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::${last_argument}" => $anvil->data->{switches}{$argument} }});
}
}
}
# Clean up the initial space added to 'raw'.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::raw:" => $anvil->data->{switches}{raw} }});
if ($anvil->data->{switches}{raw})
{
$anvil->data->{switches}{raw} =~ s/^ //;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::raw:" => $anvil->data->{switches}{raw} }});
}
return(0);
}