|
|
#!/usr/bin/perl |
|
|
# |
|
|
# This is the resource agent used to manage servers on the Anvil! Intelligent Availability platform. |
|
|
# |
|
|
# License: GNU General Public License (GPL) v2+ |
|
|
# (c) 1997-2018 - Alteeve's Niche! Inc. |
|
|
# |
|
|
# WARNING: This is a pretty purpose-specific resource agent. No effort was made to test this on an rgmanager |
|
|
# cluster or on any configuration outside how the Anvil! m3 uses it. If you plan to adapt it to |
|
|
# another purpose, let us know and we'll try to help. |
|
|
# |
|
|
# Based on: https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc |
|
|
# |
|
|
# Error types from pacemaker's perspective; |
|
|
# |
|
|
# - Soft Error - Unless specifically configured otherwise, pacemaker will attempt to recover a resource |
|
|
# in-place - usually by restarting the resource on the same node. |
|
|
# - Hard Error - Unless specifically configured otherwise, pacemaker will attempt to recover a resource |
|
|
# which failed with this error by restarting the resource on a different node. |
|
|
# - Fatal Error - This is a cluster-wide error, it would make no sense to recover such a resource on a |
|
|
# different node, let alone in-place. When a resource fails with this error, Pacemaker will |
|
|
# attempt to shut down the resource, and wait for administrator intervention. |
|
|
# |
|
|
# Exit codes; |
|
|
# 0 - OCF_SUCCESS |
|
|
# - The action completed successfully. This is the expected return code for any successful start, stop, |
|
|
# migrate_to, meta_data, help, and usage action. |
|
|
# - For monitor, however, a modified convention applies: |
|
|
# - If the server is running we return, OCF_SUCCESS. If not running and gracefully stopped or migrated |
|
|
# off, return OCF_NOT_RUNNING. |
|
|
# |
|
|
# 1 - OCF_ERR_GENERIC |
|
|
# - The action returned a generic error. This is used only when none of the more specific error codes, |
|
|
# defined below, accurately describes the problem. |
|
|
# - Pacemaker interprets this exit code as a soft error. |
|
|
# |
|
|
# 2 - OCF_ERR_ARGS |
|
|
# - The resource’s configuration is not valid on this machine. This can happen if the serve fails to boot |
|
|
# because of a missing bridge, for example. |
|
|
# |
|
|
# 3 - OCF_ERR_UNIMPLEMENTED |
|
|
# - The resource agent was instructed to execute an action that we do not implement. |
|
|
# - Not all resource agent actions are mandatory. We don't implement 'promote' or 'demote'. We do implement |
|
|
# 'migrate_to', 'migrate_from', and 'notify'. If we're misconfigured as a master/slave resource, for |
|
|
# example, then will alert the user about this misconfiguration by returning OCF_ERR_UNIMPLEMENTED. |
|
|
# |
|
|
# 4 - OCF_ERR_PERM |
|
|
# - The action failed due to insufficient permissions. This may be due to a node not being able to open a |
|
|
# definition file or resource config. |
|
|
# - Pacemaker interprets this exit code as a hard error. |
|
|
# |
|
|
# 5 - OCF_ERR_INSTALLED |
|
|
# - The action failed because a required component is missing on the node where the action was executed. |
|
|
# This may be due to a required binary not being executable, or a the DRBD resource config file not |
|
|
# existing. |
|
|
# - Pacemaker interprets this exit code as a hard error. |
|
|
# |
|
|
# 6 - OCF_ERR_CONFIGURED |
|
|
# - The action failed because the user misconfigured the resource in pacemaker. For example, the user may |
|
|
# have configured an alphanumeric string for a parameter that really should be an integer. |
|
|
# - Pacemaker interprets this exit code as a fatal error. |
|
|
# |
|
|
# 7 - OCF_NOT_RUNNING |
|
|
# - The resource was found not to be running. This is an exit code that may be returned by the monitor |
|
|
# action exclusively. Note that this implies that the resource has either gracefully shut down, or has |
|
|
# never been started. |
|
|
# |
|
|
# 8 - OCF_RUNNING_MASTER |
|
|
# 9 - OCF_FAILED_MASTER |
|
|
# - These OCF exit codes are not used here. |
|
|
# |
|
|
|
|
|
# NOTE: We don't use Anvil::Tools to keep overhead low and to keep this agent independent as possible. |
|
|
use strict; |
|
|
use warnings; |
|
|
use Anvil::Tools; |
|
|
use XML::Simple; |
|
|
use JSON; |
|
|
use Math::BigInt; |
|
|
use Data::Dumper; |
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
|
$| = 1; |
|
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
|
{ |
|
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
|
} |
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
|
$| = 1; |
|
|
|
|
|
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods |
|
|
# in the loop as well to override defaults in code. |
|
|
my $anvil = Anvil::Tools->new(); |
|
|
$anvil->Log->level({set => 2}); |
|
|
$anvil->Log->secure({set => 1}); |
|
|
|
|
|
$anvil->DRBD->get_status({debug => 2}); |
|
|
die; |
|
|
|
|
|
### Read or Set the environment variables |
|
|
# This is the name of the server we're managing. # Example values: |
|
|
$anvil->data->{environment}{OCF_RESKEY_name} = defined $ENV{OCF_RESKEY_name} ? $ENV{OCF_RESKEY_name} : ""; # srv01-c7 |
|
|
# This is our node name |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = defined $ENV{OCF_RESKEY_CRM_meta_on_node} ? $ENV{OCF_RESKEY_CRM_meta_on_node} : ""; # el8-a01n01.digimer.ca |
|
|
# This says "UUID", but it's the node ID. |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node_uuid} = defined $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} ? $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} : ""; # 1 |
|
|
# This is the timeout for the called action in millisecond. |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = defined $ENV{OCF_RESKEY_CRM_meta_timeout} ? $ENV{OCF_RESKEY_CRM_meta_timeout} : ""; # 20000 |
|
|
# If this is set, we'll bump our log level as well. |
|
|
$anvil->data->{environment}{PCMK_debug} = defined $ENV{PCMK_debug} ? $ENV{PCMK_debug} : ""; # 0 |
|
|
# These are other variables that are set, but we don't currently care about them |
|
|
$anvil->data->{environment}{OCF_EXIT_REASON_PREFIX} = defined $ENV{OCF_EXIT_REASON_PREFIX} ? $ENV{OCF_EXIT_REASON_PREFIX} : ""; # ocf-exit-reason: |
|
|
$anvil->data->{environment}{OCF_RA_VERSION_MAJOR} = defined $ENV{OCF_RA_VERSION_MAJOR} ? $ENV{OCF_RA_VERSION_MAJOR} : ""; # 1 |
|
|
$anvil->data->{environment}{OCF_RA_VERSION_MINOR} = defined $ENV{OCF_RA_VERSION_MINOR} ? $ENV{OCF_RA_VERSION_MINOR} : ""; # 0 |
|
|
$anvil->data->{environment}{OCF_RESKEY_crm_feature_set} = defined $ENV{OCF_RESKEY_crm_feature_set} ? $ENV{OCF_RESKEY_crm_feature_set} : ""; # 3.0.12 |
|
|
$anvil->data->{environment}{OCF_RESOURCE_INSTANCE} = defined $ENV{OCF_RESOURCE_INSTANCE} ? $ENV{OCF_RESOURCE_INSTANCE} : ""; # srv01-c7 |
|
|
$anvil->data->{environment}{OCF_RESOURCE_PROVIDER} = defined $ENV{OCF_RESOURCE_PROVIDER} ? $ENV{OCF_RESOURCE_PROVIDER} : ""; # alteeve |
|
|
$anvil->data->{environment}{OCF_RESOURCE_TYPE} = defined $ENV{OCF_RESOURCE_TYPE} ? $ENV{OCF_RESOURCE_TYPE} : ""; # server |
|
|
$anvil->data->{environment}{OCF_ROOT} = defined $ENV{OCF_ROOT} ? $ENV{OCF_ROOT} : ""; # /usr/lib/ocf |
|
|
# These are set during a migration |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = defined $ENV{OCF_RESKEY_CRM_meta_migrate_source} ? $ENV{OCF_RESKEY_CRM_meta_migrate_source} : ""; # el8-a01n01.digimer.ca |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = defined $ENV{OCF_RESKEY_CRM_meta_migrate_target} ? $ENV{OCF_RESKEY_CRM_meta_migrate_target} : ""; # el8-a01n02.digimer.ca |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_record_pending} = defined $ENV{OCF_RESKEY_CRM_meta_record_pending} ? $ENV{OCF_RESKEY_CRM_meta_record_pending} : ""; # true |
|
|
|
|
|
# If pacemaker is in debug, so are we, |
|
|
if ($anvil->data->{environment}{PCMK_debug}) |
|
|
{ |
|
|
$anvil->Log->level({set => 3}); |
|
|
} |
|
|
|
|
|
# Get any command line switches. |
|
|
$anvil->Get->switches; |
|
|
|
|
|
|
|
|
# Something for the logs |
|
|
if ((not $anvil->data->{switches}{metadaata}) and (not $anvil->data->{switches}{'meta-data'})) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0298"}); |
|
|
} |
|
|
|
|
|
|
|
|
### TEST: to be removed later |
|
|
if ($anvil->data->{switches}{test1}) |
|
|
{ |
|
|
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n01.digimer.ca"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n02.digimer.ca"; |
|
|
#print "Running test 1; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; |
|
|
} |
|
|
if ($anvil->data->{switches}{test2}) |
|
|
{ |
|
|
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n02.digimer.ca"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "el8-a01n02.digimer.ca"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "el8-a01n01.digimer.ca"; |
|
|
#print "Running test 2; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; |
|
|
} |
|
|
if ($anvil->data->{switches}{test3}) |
|
|
{ |
|
|
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; |
|
|
#print "Running test 3; Boot: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n"; |
|
|
} |
|
|
if ($anvil->data->{switches}{test4}) |
|
|
{ |
|
|
$anvil->data->{environment}{OCF_RESKEY_name} = "test_server"; |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "el8-a01n01.digimer.ca"; |
|
|
#print "Running test 3; Shut down: [".$anvil->data->{environment}{OCF_RESKEY_name}."] locally.\n"; |
|
|
} |
|
|
|
|
|
# This is for debugging. |
|
|
if (($anvil->data->{switches}{monitor}) or |
|
|
($anvil->data->{switches}{status}) or |
|
|
($anvil->data->{switches}{'meta-data'}) or |
|
|
($anvil->data->{switches}{metadaata})) |
|
|
{ |
|
|
show_environment($anvil, 3); |
|
|
} |
|
|
else |
|
|
{ |
|
|
show_environment($anvil, 3); |
|
|
} |
|
|
|
|
|
### What are we being asked to do? |
|
|
# start - Starts the resource. |
|
|
# stop - Shuts down the resource. |
|
|
# monitor - (status aliases here) Queries the resource for its state. |
|
|
# meta-data - Dumps the resource agent metadata. |
|
|
# promote - Turns a resource into the Master role (Master/Slave resources only). |
|
|
# demote - Turns a resource into the Slave role (Master/Slave resources only). |
|
|
# migrate_to - migration target |
|
|
# migrate_from - Implement live migration of resources. |
|
|
# validate-all - Validates a resource’s configuration. |
|
|
# help - (usage maps here) Displays a usage message when the resource agent is invoked from the command line, rather than by the cluster manager. |
|
|
# notify - Inform resource about changes in state of other clones. |
|
|
|
|
|
if ($anvil->data->{switches}{start}) |
|
|
{ |
|
|
# Start the server |
|
|
start_server($anvil); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{stop}) |
|
|
{ |
|
|
# Stop the server |
|
|
stop_server($anvil); |
|
|
} |
|
|
elsif (($anvil->data->{switches}{monitor}) or ($anvil->data->{switches}{status})) |
|
|
{ |
|
|
# Report the status of the server. |
|
|
server_status($anvil); |
|
|
} |
|
|
elsif (($anvil->data->{switches}{metadaata}) or ($anvil->data->{switches}{'meta-data'})) |
|
|
{ |
|
|
show_metadata($anvil); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{promote}) |
|
|
{ |
|
|
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3) |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0299", variables => { server => $anvil->data->{environment}{OCF_RESKEY_name} }}); |
|
|
$anvil->nice_exit({exit_code => 3}); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{demote}) |
|
|
{ |
|
|
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3) |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0300", variables => { server => $anvil->data->{environment}{OCF_RESKEY_name} }}); |
|
|
$anvil->nice_exit({exit_code => 3}); |
|
|
} |
|
|
elsif (($anvil->data->{switches}{migrate_to}) or ($anvil->data->{switches}{migrate_from})) |
|
|
{ |
|
|
# We don't support this, so we return OCF_ERR_UNIMPLEMENTED (3) |
|
|
migrate_server($anvil); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{'validate-all'}) |
|
|
{ |
|
|
# Validate our local config and setup. |
|
|
validate_all($anvil); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
elsif (($anvil->data->{switches}{help}) or ($anvil->data->{switches}{usage})) |
|
|
{ |
|
|
# Show the usage information |
|
|
show_usage($anvil); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{notify}) |
|
|
{ |
|
|
# We don't implement this |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level =>0, key => "log_0301"}); |
|
|
$anvil->nice_exit({exit_code => 3}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# We were called in some unexpected way. Log an error, show usage and exit. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level =>0, key => "log_0302"}); |
|
|
show_environment($anvil, 0); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# If we hit here, something very wrong happened. |
|
|
$anvil->nice_exit({exit_code => 255}); |
|
|
|
|
|
|
|
|
############################################################################################################# |
|
|
# Functions # |
|
|
############################################################################################################# |
|
|
|
|
|
=cut |
|
|
|
|
|
STATES |
|
|
|
|
|
The State field lists what state each domain is currently in. A domain can be in one of the following |
|
|
possible states: |
|
|
|
|
|
running - The domain is currently running on a CPU |
|
|
idle - The domain is idle, and not running or runnable. This can be caused because the domain is |
|
|
waiting on IO (a traditional wait state) or has gone to sleep because there was nothing else |
|
|
for it to do. |
|
|
paused - The domain has been paused, usually occurring through the administrator running virsh suspend. |
|
|
When in a paused state the domain will still consume allocated resources like memory, but will |
|
|
not be eligible for scheduling by the hypervisor. |
|
|
in shutdown - The domain is in the process of shutting down, i.e. the guest operating system has been |
|
|
notified and should be in the process of stopping its operations gracefully. |
|
|
shut off - The domain is not running. Usually this indicates the domain has been shut down completely, or |
|
|
has not been started. |
|
|
crashed - The domain has crashed, which is always a violent ending. Usually this state can only occur if |
|
|
the domain has been configured not to restart on crash. |
|
|
pmsuspended - The domain has been suspended by guest power management, e.g. entered into s3 state. |
|
|
=cut |
|
|
|
|
|
# This boots the server if possible. |
|
|
sub start_server |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# Start procedure; |
|
|
# 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails. |
|
|
# 2. Make sure the name matches. |
|
|
# 3. Make sure we have enough free RAM. |
|
|
# 4. Make sure the emulator exists (can be an issue after migrating from an different gen Anvil!). |
|
|
# 5.1. Make sure optical drives with mounted data have the disk present. Soft error if not. |
|
|
# 5.2. Find any backing DRBD devices |
|
|
# 6. For each DRBD device; |
|
|
# 6.1. Make sure the backing LV is ACTIVE. Soft error if not. |
|
|
# 6.2. Check if the drbd resource is up. If not, up it. |
|
|
# 6.3. Make sure the backing disk is UpToDate. Soft error if not. |
|
|
# 6.4. Make sure the backing device is 'Connected' or 'Connecting'. Call a connect if not. |
|
|
# 7. Make sure all bridges exist and soft error if not. |
|
|
# 8. Start the server. |
|
|
|
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0303", variables => { server => $server }}); |
|
|
|
|
|
# If the server is already here, we'll do nothing else. |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to |
|
|
# also start the server on another node, because we don't know the state of it here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "log_0304", variables => { return_code => $return_code, output => $output }}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
|
|
|
# Make sure the server is shut down, if it is listed at all. Any other state is |
|
|
# unexpected and needs to be sorted by a human. |
|
|
if ($state ne "shut down") |
|
|
{ |
|
|
# Abort |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0306", variables => { server => $server, 'state' => $state }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
last; |
|
|
} |
|
|
} |
|
|
|
|
|
# We need to boot, validate everything. |
|
|
validate_all($anvil); |
|
|
|
|
|
# If we're still alive, we're ready to boot. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0305", variables => { server => $server }}); |
|
|
|
|
|
my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { definition_file => $definition_file }}); |
|
|
|
|
|
$return_code = undef; |
|
|
$output = undef; |
|
|
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." create $definition_file"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to |
|
|
# also start the server on another node, because we don't know the state of it here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0307", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 6}); |
|
|
} |
|
|
|
|
|
# Verify that it started. |
|
|
sleep 2; |
|
|
$return_code = undef; |
|
|
$output = undef; |
|
|
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to |
|
|
# also start the server on another node, because we don't know the state of it here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0308", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 6}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
|
|
|
if ($state eq "running") |
|
|
{ |
|
|
# Success! |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0309", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# WTF? |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0310", variables => { server => $server, 'state' => $state }}); |
|
|
$anvil->nice_exit({exit_code => 6}); |
|
|
} |
|
|
|
|
|
last; |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're still alive, then we didn't see the server in the list of running servers, which is really weird. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0311", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# This shuts down the server if possible. |
|
|
sub stop_server |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# Stopping the server is simply a question of "is the server running?" and, if so, stop it. Once |
|
|
# stopped, we stop the DRBD resource on both nodes. |
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0312", variables => { return_code => $return_code, output => $output }}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
my $shutdown = 1; |
|
|
my $found = 0; |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$found = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
|
|
|
if ($state eq "running") |
|
|
{ |
|
|
# The server is running, shut it down. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0313", variables => { server => $server }}); |
|
|
} |
|
|
elsif ($state eq "paused") |
|
|
{ |
|
|
# The server is paused. Resume it, wait a few, then proceed with the shutdown. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0314", variables => { server => $server }}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." resume $server"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0315", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0316"}); |
|
|
sleep 3; |
|
|
} |
|
|
elsif ($state eq "pmsuspended") |
|
|
{ |
|
|
# The server is paused. Resume it, wait a few, then proceed with the shutdown. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0317", variables => { server => $server }}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." dompmwakeup $server"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0318", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0319"}); |
|
|
sleep 30; |
|
|
} |
|
|
elsif ($state eq "in shutdown") |
|
|
{ |
|
|
# The server is already shutting down |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0320", variables => { server => $server }}); |
|
|
$shutdown = 0; |
|
|
} |
|
|
elsif ($state eq "shut off") |
|
|
{ |
|
|
# The server is already shutting down |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0321", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
elsif (($state eq "idle") or ($state eq "crashed")) |
|
|
{ |
|
|
# The server needs to be destroyed. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0322", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." destroy $server"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0323", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# WTF? |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0325", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 6}); |
|
|
} |
|
|
|
|
|
last; |
|
|
} |
|
|
} |
|
|
|
|
|
# If we didn't see it, it's off and undefined. |
|
|
if (not $found) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0326", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
# If we're alive, it is time to stop the server |
|
|
if ($shutdown) |
|
|
{ |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." shutdown $server"}); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0327", variables => { server => $server }}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0328", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
|
|
|
# Now loop until we see the server either vanish from virsh or enter "shut off" state. We wait |
|
|
# forever and let pacemaker kill us if we time out. |
|
|
while (1) |
|
|
{ |
|
|
my $found = 0; |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Looks like virsh isn't running. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0312", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$found = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
|
|
|
if ($state eq "shut off") |
|
|
{ |
|
|
# We're down. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
last; |
|
|
} |
|
|
} |
|
|
|
|
|
# If we didn't find the server, it's off and undefined now. |
|
|
if (not $found) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0329", variables => { server => $server }}); |
|
|
|
|
|
# Stop DRBD resources now. We don't worry if it actually stops or not (let ScanCore |
|
|
# handle that). We only care that the server has stopped. |
|
|
read_server_definition($anvil); |
|
|
validate_storage($anvil); |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
|
|
|
my $resource = $anvil->data->{device_path}{$device_path}{resource}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
device_path => $device_path, |
|
|
resource => $resource, |
|
|
}}); |
|
|
|
|
|
if ((not exists $anvil->data->{drbd}{stopped}{$resource}) or (not $anvil->data->{drbd}{stopped}{$resource})) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0408", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
manage_drbd_resource($anvil, "down", $resource); |
|
|
$anvil->data->{drbd}{stopped}{$resource} = 1; |
|
|
} |
|
|
else |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0409", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
} |
|
|
} |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0330", variables => { server => $server }}); |
|
|
sleep 5; |
|
|
} |
|
|
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
# This checks the status of the server. |
|
|
sub server_status |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# If the named server is running, return OCF_SUCCESS (0), otherwise OCF_NOT_RUNNING (7). If the |
|
|
# server is failed, return OCF_ERR_GENERIC (1). |
|
|
my $state = ""; |
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
|
|
|
### NOTE: When pacemaker is first starting, virsh won't be up right away. So if we get a return code |
|
|
### of '1', we'll try again up to 50% of 'environment::OCF_RESKEY_CRM_meta_timeout'. |
|
|
if (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout}) |
|
|
{ |
|
|
# Set a sane default of 20 seconds. |
|
|
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = 20000; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "warn", key => "log_0331", variables => { logout => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} }}); |
|
|
} |
|
|
my $return_code = undef; |
|
|
my $output = ""; |
|
|
my $current_time = time; |
|
|
my $timeout = $current_time + int(($anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} /= 1000) / 2); |
|
|
my $waiting = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
current_time => $current_time, |
|
|
timeout => $timeout, |
|
|
}}); |
|
|
|
|
|
while($waiting) |
|
|
{ |
|
|
# Make the call |
|
|
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); |
|
|
if (not $return_code) |
|
|
{ |
|
|
$waiting = 0; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { waiting => $waiting }}); |
|
|
} |
|
|
elsif (time > $timeout) |
|
|
{ |
|
|
# We've waited long enough. |
|
|
$waiting = 0; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "warn", key => "log_0332", variables => { return_code => $return_code }}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0333", variables => { return_code => $return_code }}); |
|
|
sleep 2; |
|
|
} |
|
|
} |
|
|
|
|
|
# If I got a non-zero return code, something went wrong with the virsh call. |
|
|
if ($return_code) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0334", variables => { return_code => $return_code }}); |
|
|
if ($output) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0335", variables => { |
|
|
command => $anvil->data->{path}{exe}{virsh}, |
|
|
output => $output, |
|
|
}}); |
|
|
} |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# If we're still alive, process the output |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
$state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
last; |
|
|
} |
|
|
} |
|
|
|
|
|
# If there is a state, see what the state is. |
|
|
if ($state) |
|
|
{ |
|
|
# What is the state? |
|
|
# (See the comment below the 'FUNCTIONS' divider above the first function for a full list of states.) |
|
|
if (($state eq "running") or ($state eq "paused") or ($state eq "pmsuspended") or ($state eq "in shutdown")) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0336", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
elsif ($state eq "shut off") |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0337", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => $7}); |
|
|
} |
|
|
elsif (($state eq "idle") or ($state eq "crashed")) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0338", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# WTF? |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0339", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
# Not running. Exit with OCF_NOT_RUNNING |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0340", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 7}); |
|
|
} |
|
|
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
# Migrate the server |
|
|
sub migrate_server |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# If we were given 'migrate_to', we need to make sure the storage is UpToDate on the peer for all |
|
|
# backing resources. We can't check the target's bridges, but the migation will fail if one is |
|
|
# missing. |
|
|
# If we're given 'migrate_from', we're pulling the server towards us, so we can check both brdiges |
|
|
# and storage. |
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
my $source = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}; |
|
|
my $target = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
source => $source, |
|
|
target => $target, |
|
|
}}); |
|
|
|
|
|
# The actual migration command will involve enabling dual primary, then beginning the migration. The |
|
|
# virsh call will depend on if we're pushing or pulling. Once the migration completes, regardless of |
|
|
# success or failure, dual primary will be disabled again. |
|
|
my $migration_command = ""; |
|
|
my $verify_command = ""; |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
'switches::migrate_to' => $anvil->data->{switches}{migrate_to}, |
|
|
'switches::migrate_from' => $anvil->data->{switches}{migrate_from}, |
|
|
}}); |
|
|
if ($anvil->data->{switches}{migrate_to}) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0341", variables => { |
|
|
server => $server, |
|
|
target => $target, |
|
|
}}); |
|
|
|
|
|
# Is the server even here? |
|
|
my $found = 0; |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0342", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$found = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
found => $found, |
|
|
}}); |
|
|
|
|
|
# We can only migrate if it is running. |
|
|
if (lc($state) ne "running") |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0343", variables => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
} |
|
|
if (not $found) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0344", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
read_server_definition($anvil); |
|
|
validate_storage($anvil); |
|
|
|
|
|
# If we're alive, craft the migration command. |
|
|
$migration_command = $anvil->data->{path}{exe}{virsh}." migrate --undefinesource --tunnelled --p2p --live ".$server." qemu+ssh://".$target."/system"; |
|
|
$verify_command = $anvil->data->{path}{exe}{virsh}." list"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
migration_command => $migration_command, |
|
|
verify_command => $verify_command, |
|
|
}}); |
|
|
} |
|
|
elsif ($anvil->data->{switches}{migrate_from}) |
|
|
{ |
|
|
# This is called after a migration. In case this is the case here, the target will be us. |
|
|
# Just make sure it is running and, if so, return '0'. |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
"environment::OCF_RESKEY_CRM_meta_on_node" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node}, |
|
|
target => $target, |
|
|
}}); |
|
|
if ($anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} eq $target) |
|
|
{ |
|
|
# Yup. All we want to do if make sure it is running here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0345", variables => { server => $server }}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# This really shouldn't happen... The migration to here should have failed. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0346", variables => { |
|
|
server => $server, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
if ($state eq "running") |
|
|
{ |
|
|
# Success! |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0347", variables => { server => $server }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're still alive, we'll proceed as if we're pulling the server to us, and maybe |
|
|
# that will work. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0348", variables => { server => $server }}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# If we're being re-invoked after a previous successful migration, then the server |
|
|
# might already be here. Check before we proceed. |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to |
|
|
# also start the server on another node, because we don't know the state of it here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "log_0304", variables => { return_code => $return_code, output => $output }}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
|
|
|
# Make sure the server is shut down, if it is listed at all. Any other state is |
|
|
# unexpected and needs to be sorted by a human. |
|
|
if ($state ne "shut down") |
|
|
{ |
|
|
# Abort |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0412", variables => { server => $server, 'state' => $state }}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
last; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# Validate everything, as if we were about to boot |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0349", variables => { |
|
|
server => $server, |
|
|
target => $target, |
|
|
}}); |
|
|
validate_all($anvil); |
|
|
|
|
|
# If we're alive, craft the migration command. |
|
|
$migration_command = $anvil->data->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system migrate --undefinesource --tunnelled --p2p --live ".$server." qemu+ssh://".$target."/system"; |
|
|
$verify_command = $anvil->data->{path}{exe}{virsh}." -c qemu+ssh://root\@".$source."/system list"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
migration_command => $migration_command, |
|
|
verify_command => $verify_command, |
|
|
}}); |
|
|
} |
|
|
|
|
|
# Enable dual-primary. If this fails, we will disable (or try to) and then abort. |
|
|
my $migrate = 1; |
|
|
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{resource}}) |
|
|
{ |
|
|
next if not defined $anvil->data->{resource}{$resource}{target_node_id}; |
|
|
next if not $migrate; |
|
|
my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." net-options ".$resource." ".$anvil->data->{resource}{$resource}{target_node_id}." --allow-two-primaries=yes"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0350", variables => { |
|
|
resource => $resource, |
|
|
target_name => $anvil->data->{resource}{$resource}{target_name}, |
|
|
target_node_id => $anvil->data->{resource}{$resource}{target_node_id}, |
|
|
}}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0346", variables => { |
|
|
resource => $resource, |
|
|
target_name => $anvil->data->{resource}{$resource}{target_name}, |
|
|
target_node_id => $anvil->data->{resource}{$resource}{target_node_id}, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
|
|
|
# Disable migration (and any further attempts to enable dual-primary). |
|
|
$migrate = 0; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrate => $migrate }}); |
|
|
} |
|
|
} |
|
|
|
|
|
my $migrated = 0; |
|
|
if ($migrate) |
|
|
{ |
|
|
# Call the migration. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0352", variables => { |
|
|
server => $server, |
|
|
target => $target, |
|
|
}}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migration_command => $migration_command }}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $migration_command}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0353", variables => { |
|
|
server => $server, |
|
|
target => $target, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0354"}); |
|
|
|
|
|
$migrated = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrated => $migrated }}); |
|
|
} |
|
|
} |
|
|
|
|
|
# Switch off dual-primary. |
|
|
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust all"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0355"}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0356", variables => { |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# Did something go wrong during the dual-primary enable or the actual migration call? |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
migrate => $migrate, |
|
|
migrated => $migrated, |
|
|
}}); |
|
|
if ((not $migrate) or (not $migrated)) |
|
|
{ |
|
|
# Exit |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0357"}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# Last, verify that the server is now on the target. |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { verify_command => $verify_command }}); |
|
|
|
|
|
$return_code = undef; |
|
|
$output = undef; |
|
|
($output, $return_code) = $anvil->System->call({shell_call => $verify_command}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# If this fails, we want to exit with OCF_ERR_CONFIGURED (6) so that pacemaker doesn't try to |
|
|
# also start the server on another node, because we don't know the state of it here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0358", variables => { |
|
|
target => $target, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
foreach my $line (split/\n/, $output) |
|
|
{ |
|
|
$line =~ s/^\s+//; |
|
|
$line =~ s/\s+$//; |
|
|
$line =~ s/\s+/ /g; |
|
|
|
|
|
if ($line =~ /^(\d+) $server (.*)$/) |
|
|
{ |
|
|
my $state = $2; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
server => $server, |
|
|
'state' => $state, |
|
|
}}); |
|
|
if ($state eq "running") |
|
|
{ |
|
|
# Success! |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0359", variables => { |
|
|
server => $server, |
|
|
target => $target, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# If we made it here, we succeeded. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0360"}); |
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
# Validation checks that we have the definition XML, resource config and that needed apps are installed. |
|
|
sub validate_all |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0361"}); |
|
|
|
|
|
# Read in the server's definition file (if found and readable). |
|
|
read_server_definition($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0362"}); |
|
|
|
|
|
# Does the internal server name match? |
|
|
validate_name($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0363"}); |
|
|
|
|
|
# Make sure the emulator it wants is the one we have. |
|
|
validate_emulator($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0364"}); |
|
|
|
|
|
# These tests are only needed if we're about to boot the server |
|
|
if (($anvil->data->{switches}{start}) or ($anvil->data->{switches}{migrate_from})) |
|
|
{ |
|
|
# Check that we have enough RAM. |
|
|
validate_ram($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0365"}); |
|
|
} |
|
|
|
|
|
# Validate bridges |
|
|
validate_bridges($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0366"}); |
|
|
|
|
|
# Validate storage (Disks and optical media) |
|
|
validate_storage($anvil); |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0367"}); |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This ensures that the bridges the server connects to exist on this node. |
|
|
sub validate_bridges |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# Find the Optical drives and DRBD devices. |
|
|
foreach my $device_ref (@{$anvil->data->{server}{definition_xml}->{devices}}) |
|
|
{ |
|
|
foreach my $interface_ref (@{$device_ref->{interface}}) |
|
|
{ |
|
|
foreach my $source_ref (@{$interface_ref->{source}}) |
|
|
{ |
|
|
my $bridge = $source_ref->{bridge}; |
|
|
$anvil->data->{server}{bridges}{$bridge} = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::bridges::${bridge}" => $anvil->data->{server}{bridges}{$bridge} }}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# Get a list of available bridges. We pick up interfaces and MTU data as well, though there really |
|
|
# isn't any use for it at this time. |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{bridge}." -j link show"}); |
|
|
my $json = JSON->new->allow_nonref; |
|
|
my $bridge_data = $json->decode($output); |
|
|
foreach my $hash_ref (@{$bridge_data}) |
|
|
{ |
|
|
my $bridge = $hash_ref->{master}; |
|
|
my $interface = $hash_ref->{ifname}; |
|
|
my $mtu = $hash_ref->{mtu}; |
|
|
$anvil->data->{bridge}{$bridge}{interface}{$interface}{mtu} = $mtu; |
|
|
$anvil->data->{'local'}{bridge}{$bridge} = 1; |
|
|
} |
|
|
|
|
|
# Verify bridges now |
|
|
foreach my $bridge (sort {$a cmp $b} keys %{$anvil->data->{server}{bridges}}) |
|
|
{ |
|
|
if ($anvil->data->{'local'}{bridge}{$bridge}) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0368", variables => { bridge => $bridge }}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# Missing bridge. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0369", variables => { bridge => $bridge }}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This looks up the disks and optical media connected to this server. |
|
|
sub validate_storage |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# Find the storage device(s) this server uses. |
|
|
foreach my $device_ref (@{$anvil->data->{server}{definition_xml}->{devices}}) |
|
|
{ |
|
|
foreach my $disk_ref (@{$device_ref->{disk}}) |
|
|
{ |
|
|
my $type = $disk_ref->{device}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }}); |
|
|
if ($type eq "disk") |
|
|
{ |
|
|
foreach my $source_ref (@{$disk_ref->{source}}) |
|
|
{ |
|
|
my $device_path = $source_ref->{dev}; |
|
|
$anvil->data->{server}{disks}{$device_path} = "check"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path} }}); |
|
|
} |
|
|
} |
|
|
elsif ($type eq "cdrom") |
|
|
{ |
|
|
foreach my $source_ref (@{$disk_ref->{source}}) |
|
|
{ |
|
|
my $file = $source_ref->{file}; |
|
|
$anvil->data->{server}{optical}{$file} = 1; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::optical::${file}" => $anvil->data->{server}{optical}{$file} }}); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# Verify optical disks now, unless we're migrating a server off of us or stopping. |
|
|
if ((not $anvil->data->{switches}{migrate_to}) && (not $anvil->data->{switches}{stop})) |
|
|
{ |
|
|
validate_storage_optical($anvil); |
|
|
} |
|
|
|
|
|
# Verify DRBD devices now |
|
|
validate_storage_drbd($anvil); |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# THis makes sure that the needed backing DRBD devices are on this node. If so, and if they are not up, they |
|
|
# will be brought up. If that fails, it errors out. |
|
|
sub validate_storage_drbd |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# Read in the DRBD configuration XML. |
|
|
my ($drbd_body, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." dump-xml"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0370", variables => { |
|
|
return_code => $return_code, |
|
|
drbd_body => $drbd_body, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# Parse the XML |
|
|
my $drbd_xml = ""; |
|
|
my $xml = XML::Simple->new(); |
|
|
eval { $drbd_xml = $xml->XMLin($drbd_body, KeyAttr => ["name", "vnr"], ForceArray => 1) }; |
|
|
if ($@) |
|
|
{ |
|
|
chomp $@; |
|
|
my $error = "[ Error ] - The was a problem parsing: [$drbd_body]. The error was:\n"; |
|
|
$error .= "===========================================================\n"; |
|
|
$error .= $@."\n"; |
|
|
$error .= "===========================================================\n"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
foreach my $resource (sort {$a cmp $b} keys %{$drbd_xml->{resource}}) |
|
|
{ |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); |
|
|
|
|
|
# Figure out who I am and who my peer is, ignoring DR host(s). |
|
|
my $peer = ""; |
|
|
my $local = ""; |
|
|
foreach my $connection_ref (@{$drbd_xml->{resource}->{$resource}->{connection}}) |
|
|
{ |
|
|
my $protocol = $connection_ref->{section}->{net}->{option}->{protocol}->{value}; |
|
|
my $fencing = $connection_ref->{section}->{net}->{option}->{fencing}->{value}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
protocol => $protocol, |
|
|
fencing => $fencing, |
|
|
}}); |
|
|
|
|
|
# If this isn't set to 'resource-and-stonith', it's a DR connection and we'll ignore |
|
|
# it. |
|
|
next if $fencing ne "resource-and-stonith"; |
|
|
|
|
|
# Look at the hosts |
|
|
foreach my $host (sort {$a cmp $b} keys %{$connection_ref->{host}}) |
|
|
{ |
|
|
my $address = $connection_ref->{host}->{$host}->{address}->[0]->{content}; |
|
|
my $port = $connection_ref->{host}->{$host}->{address}->[0]->{port}; |
|
|
my $short_hostname = $host; |
|
|
$short_hostname =~ s/\..*$//; |
|
|
my $local_hostname = $anvil->_hostname; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
host => $host, |
|
|
short_hostname => $short_hostname, |
|
|
address => $address, |
|
|
port => $port, |
|
|
local_hostname => $local_hostname, |
|
|
}}); |
|
|
|
|
|
# Is this me or the peer? |
|
|
if (($local_hostname eq $short_hostname) or ($local_hostname =~ /^$short_hostname\./)) |
|
|
{ |
|
|
# This is us. |
|
|
$local = $host; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'local' => $local }}); |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0371", variables => { |
|
|
resource => $resource, |
|
|
address => $address, |
|
|
port => $port, |
|
|
}}); |
|
|
$anvil->data->{server}{drbd}{'local'}{hostname} = $host, |
|
|
$anvil->data->{server}{drbd}{'local'}{short_hostname} = $short_hostname, |
|
|
$anvil->data->{server}{drbd}{'local'}{address} = $address, |
|
|
$anvil->data->{server}{drbd}{'local'}{port} = $port, |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
"server::drbd::local::hostname" => $anvil->data->{server}{drbd}{'local'}{hostname}, |
|
|
"server::drbd::local::short_hostname" => $anvil->data->{server}{drbd}{'local'}{short_hostname}, |
|
|
"server::drbd::local::address" => $anvil->data->{server}{drbd}{'local'}{address}, |
|
|
"server::drbd::local::port" => $anvil->data->{server}{drbd}{'local'}{port}, |
|
|
}}); |
|
|
|
|
|
# Record my node name for this resource (to be paired with the node |
|
|
# ID when migrating) |
|
|
$anvil->data->{resource}{$resource}{local_node_name} = $host; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "resource::${resource}::local_node_name" => $anvil->data->{resource}{$resource}{local_node_name} }}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# This is our peer |
|
|
$peer = $host; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer => $peer }}); |
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0372", variables => { |
|
|
resource => $resource, |
|
|
address => $address, |
|
|
port => $port, |
|
|
}}); |
|
|
$anvil->data->{server}{drbd}{peer}{hostname} = $host, |
|
|
$anvil->data->{server}{drbd}{peer}{short_hostname} = $short_hostname, |
|
|
$anvil->data->{server}{drbd}{peer}{address} = $address, |
|
|
$anvil->data->{server}{drbd}{peer}{port} = $port, |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
"server::drbd::peer::hostname" => $anvil->data->{server}{drbd}{peer}{hostname}, |
|
|
"server::drbd::peer::short_hostname" => $anvil->data->{server}{drbd}{peer}{short_hostname}, |
|
|
"server::drbd::peer::address" => $anvil->data->{server}{drbd}{peer}{address}, |
|
|
"server::drbd::peer::port" => $anvil->data->{server}{drbd}{peer}{port}, |
|
|
}}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
'local' => $local, |
|
|
peer => $peer, |
|
|
}}); |
|
|
foreach my $volume (sort {$a cmp $b} keys %{$drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}}) |
|
|
{ |
|
|
# The backing device is the logical volume underpinning this DRBD device on this node. |
|
|
my $backing_device = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{disk}->[0]; |
|
|
my $device_path = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{device}->[0]->{content}; |
|
|
my $device_minor = $drbd_xml->{resource}->{$resource}->{host}->{$local}->{volume}->{$volume}->{device}->[0]->{minor}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
volume => $volume, |
|
|
backing_device => $backing_device, |
|
|
device_path => $device_path, |
|
|
device_minor => $device_minor, |
|
|
}}); |
|
|
|
|
|
$anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv} = $backing_device; |
|
|
$anvil->data->{server}{drbd}{'local'}{device}{$device_path}{minor} = $device_minor; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
"server::drbd::local::device::${device_path}::lv" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv}, |
|
|
"server::drbd::local::device::${device_path}::minor" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{minor}, |
|
|
}}); |
|
|
|
|
|
# Map the resource name to the local drbd device path. |
|
|
$anvil->data->{resource}{$resource}{volume}{$volume}{lv} = $backing_device; |
|
|
$anvil->data->{resource}{$resource}{volume}{$volume}{path} = $device_path; |
|
|
$anvil->data->{device_path}{$device_path}{resource} = $resource; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
"resource::${resource}::volume::${volume}::path" => $anvil->data->{resource}{$resource}{volume}{$volume}{path}, |
|
|
"resource::${resource}::volume::${volume}::lv" => $anvil->data->{resource}{$resource}{volume}{$volume}{lv}, |
|
|
"device_path::${device_path}::resource" => $anvil->data->{device_path}{$device_path}{resource}, |
|
|
}}); |
|
|
} |
|
|
} |
|
|
|
|
|
# Pair the volumes to their backing LVs. |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0373", variables => { device_path => $device_path }}); |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "server::drbd::local::device::${device_path}::lv" => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv} }}); |
|
|
if (not $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv}) |
|
|
{ |
|
|
# The backing LV doesn't exist. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 0, priority => "err", key => "log_0374", variables => { device_path => $device_path }}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
elsif (not -e $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv}) |
|
|
{ |
|
|
# The backing LV doesn't exist. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 0, priority => "err", key => "log_0375", variables => { |
|
|
device_path => $device_path, |
|
|
lv => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv}, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0376", variables => { |
|
|
device_path => $device_path, |
|
|
lv => $anvil->data->{server}{drbd}{'local'}{device}{$device_path}{lv}, |
|
|
}}); |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're in a stop operation, we're done. |
|
|
if ($anvil->data->{switches}{stop}) |
|
|
{ |
|
|
return(0); |
|
|
} |
|
|
|
|
|
# Now read in the status of the drbd devices |
|
|
$return_code = undef; |
|
|
(my $status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
status_json => $status_json, |
|
|
return_code => $return_code, |
|
|
json_length => length($status_json), |
|
|
}}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0377", variables => { |
|
|
return_code => $return_code, |
|
|
status_json => $status_json, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# If DRBD is not up, the returned JSON output will not actually exist. |
|
|
if (($status_json =~ /No currently configured DRBD found/si) or (not check_drbd_status($anvil, $status_json))) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0378"}); |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
my $resource = $anvil->data->{device_path}{$device_path}{resource}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
device_path => $device_path, |
|
|
resource => length($resource), |
|
|
}}); |
|
|
|
|
|
if ((not exists $anvil->data->{drbd}{started}{$resource}) or (not $anvil->data->{drbd}{started}{$resource})) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0379", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0407", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
next; |
|
|
} |
|
|
|
|
|
manage_drbd_resource($anvil, "up", $resource); |
|
|
$anvil->data->{drbd}{started}{$resource} = 1; |
|
|
} |
|
|
|
|
|
# Give them a few seconds to start. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0381"}); |
|
|
sleep 3; |
|
|
|
|
|
# Check DRBD setup again |
|
|
$return_code = undef; |
|
|
$status_json = undef; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0385"}); |
|
|
($status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
status_json => $status_json, |
|
|
return_code => $return_code, |
|
|
}}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0382", variables => { |
|
|
return_code => $return_code, |
|
|
status_json => $status_json, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# If DRBD is still not up, we're done. |
|
|
if (($status_json =~ /No currently configured DRBD found/si) or (length($status_json) < 5)) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0383"}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
|
|
|
# Process the JSON data. If any disks are not seen, they won't be set to 'ok', which we'll catch next. |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status_json => $status_json }}); |
|
|
check_drbd_status($anvil, $status_json); |
|
|
|
|
|
### NOTE: The checks below might no longer be needed. |
|
|
=cut |
|
|
# Make sure I saw all disks. |
|
|
my $check_again = 0; |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
device_path => $device_path, |
|
|
"server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path}, |
|
|
}}); |
|
|
|
|
|
if ($anvil->data->{server}{disks}{$device_path} eq "check") |
|
|
{ |
|
|
# Failed to see it, see if we can bring it up. |
|
|
$check_again = 1; |
|
|
my $resource = $anvil->data->{device_path}{$device_path}{resource}; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0384", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
|
|
|
#manage_drbd_resource($anvil, "up", $resource); |
|
|
(my $drbdadm_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." up $resource"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0380", variables => { |
|
|
resource => $resource, |
|
|
return_code => $return_code, |
|
|
drbdadm_output => $drbdadm_output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { check_again => $check_again }}); |
|
|
if ($check_again) |
|
|
{ |
|
|
# Give the resource a few seconds to start. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0381"}); |
|
|
sleep 3; |
|
|
|
|
|
# Check again. |
|
|
$return_code = undef; |
|
|
$status_json = undef; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0385"}); |
|
|
($status_json, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdsetup}." status --json"}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0382", variables => { |
|
|
return_code => $return_code, |
|
|
status_json => $status_json, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# Check again. |
|
|
check_drbd_status($anvil, $status_json); |
|
|
} |
|
|
} |
|
|
|
|
|
# Do I need to check again? |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {check_again => $check_again }}); |
|
|
if ($check_again) |
|
|
{ |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
if ($anvil->data->{server}{disks}{$device_path} eq "check") |
|
|
{ |
|
|
# Failed. |
|
|
my $resource = $anvil->data->{device_path}{$device_path}{resource}; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0386", variables => { |
|
|
resource => $resource, |
|
|
device_path => $device_path, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
} |
|
|
=cut |
|
|
|
|
|
### TODO: Finish this, whatever this was going to be... |
|
|
# If I am about to push a server off, we need to make sure the peer is UpToDate |
|
|
if ($anvil->data->{switches}{migrate_to}) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0387"}); |
|
|
foreach my $device_path (sort {$a cmp $b} keys %{$anvil->data->{server}{disks}}) |
|
|
{ |
|
|
} |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This processes the DRBD setup JSON data |
|
|
sub check_drbd_status |
|
|
{ |
|
|
my ($anvil, $status_json) = @_; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status_json => $status_json }}); |
|
|
|
|
|
my $json = JSON->new->allow_nonref; |
|
|
my $drbd_status = $json->decode($status_json); |
|
|
my $resource_found = 0; |
|
|
foreach my $resource_ref (@{$drbd_status}) |
|
|
{ |
|
|
my $resource = $resource_ref->{name}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); |
|
|
|
|
|
# Record my node ID for this resource |
|
|
$anvil->data->{resource}{$resource}{local_node_id} = $resource_ref->{'node-id'}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "resource::${resource}::local_node_id" => $anvil->data->{resource}{$resource}{local_node_id} }}); |
|
|
|
|
|
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{resource}{$resource}{volume}}) |
|
|
{ |
|
|
my $device_path = $anvil->data->{resource}{$resource}{volume}{$volume}{path}; |
|
|
my $logical_volume = $anvil->data->{resource}{$resource}{volume}{$volume}{lv}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
's1:volume' => $volume, |
|
|
's2:device_path' => $device_path, |
|
|
's3:logical_volume' => $logical_volume, |
|
|
's4:server::disks::$device_path' => defined $anvil->data->{server}{disks}{$device_path} ? $anvil->data->{server}{disks}{$device_path} : "", |
|
|
}}); |
|
|
|
|
|
if ((defined $anvil->data->{server}{disks}{$device_path}) && ($anvil->data->{server}{disks}{$device_path} eq "check")) |
|
|
{ |
|
|
### This disk is in use by this server, check it. |
|
|
$resource_found = 1; |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0388", variables => { device_path => $device_path }}); |
|
|
|
|
|
# We can't run the server here until our device(s) are UpToDate or SyncSource. |
|
|
if (($anvil->data->{switches}{start}) or ($anvil->data->{switches}{migrate_from})) |
|
|
{ |
|
|
foreach my $device_ref (@{$resource_ref->{devices}}) |
|
|
{ |
|
|
# Are we UpToDate (or SyncSource)? |
|
|
if ((lc($device_ref->{'disk-state'}) ne "uptodate") && (lc($device_ref->{'disk-state'}) ne "syncsource")) |
|
|
{ |
|
|
# We can't run here. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0389", variables => { |
|
|
resource => $resource, |
|
|
volume => $device_ref->{volume}, |
|
|
disk_state => $device_ref->{'disk-state'}, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# We're good. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0390", variables => { |
|
|
resource => $resource, |
|
|
volume => $device_ref->{volume}, |
|
|
disk_state => $device_ref->{'disk-state'}, |
|
|
}}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're booting a server, we need to be sure that *no* peer is Primary. |
|
|
foreach my $connection_ref (@{$resource_ref->{connections}}) |
|
|
{ |
|
|
# If we're not connected, skip. |
|
|
my $connection_state = $connection_ref->{'connection-state'}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { connection_state => $connection_state }}); |
|
|
next if lc($connection_state) ne "connected"; |
|
|
|
|
|
# Is the peer's role Primary? In all cases, we abort if so. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0391", variables => { name => $connection_ref->{name} }}); |
|
|
if ((lc($connection_ref->{'peer-role'}) eq "primary") && ($anvil->data->{switches}{start})) |
|
|
{ |
|
|
# Don't boot here |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0392", variables => { |
|
|
resource => $resource, |
|
|
name => $connection_ref->{name} |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
# If we're migrating to the peer, make sure the target disk state is UpToDate |
|
|
# or SyncSource. |
|
|
if ($anvil->data->{switches}{migrate_to}) |
|
|
{ |
|
|
# Is this connection to our migration target? |
|
|
my $peer_short_name = $connection_ref->{name}; |
|
|
$peer_short_name =~ s/\..*$//; |
|
|
my $migration_target = $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}; |
|
|
$migration_target =~ s/\..*$//; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
peer_short_name => $peer_short_name, |
|
|
migration_target => $migration_target, |
|
|
}}); |
|
|
if ($peer_short_name ne $migration_target) |
|
|
{ |
|
|
# Ignore this, it isn't our target |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0394", variables => { peer_short_name => $peer_short_name }}); |
|
|
next; |
|
|
} |
|
|
|
|
|
# We will need the node ID to enable dual-primary. |
|
|
$anvil->data->{resource}{$resource}{target_name} = $connection_ref->{name}; |
|
|
$anvil->data->{resource}{$resource}{target_node_id} = $connection_ref->{'peer-node-id'}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
"resource::${resource}::target_name" => $anvil->data->{resource}{$resource}{target_name}, |
|
|
"resource::${resource}::target_node_id" => $anvil->data->{resource}{$resource}{target_node_id}, |
|
|
}}); |
|
|
|
|
|
# If we're still alive, we want to ensure all volumes are UpToDate. |
|
|
foreach my $volume_ref (@{$connection_ref->{peer_devices}}) |
|
|
{ |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
volume => $volume_ref->{volume}, |
|
|
disk_state => $volume_ref->{'peer-disk-state'}, |
|
|
}}); |
|
|
if ((lc($volume_ref->{'peer-disk-state'}) ne "uptodate") && (lc($volume_ref->{'peer-disk-state'}) ne "syncsource")) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0395", variables => { |
|
|
resource => $resource, |
|
|
name => $connection_ref->{name} |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're here, it's OK. |
|
|
$anvil->data->{server}{disks}{$device_path} = "ok"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "server::disks::${device_path}" => $anvil->data->{server}{disks}{$device_path} }}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# Ignoring, not used. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0396", variables => { device_path => $device_path }}); |
|
|
} |
|
|
} |
|
|
|
|
|
# If we're still alive and we're booting a server, make sure the local resource is Primary |
|
|
my $role = $resource_ref->{role}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { role => $role }}); |
|
|
if (($anvil->data->{switches}{start}) && (lc($role) eq "secondary")) |
|
|
{ |
|
|
# Go primary. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "log_0410", variables => { |
|
|
resource => $resource, |
|
|
role => $role, |
|
|
}}); |
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." primary ".$resource}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
output => $output, |
|
|
return_code => $return_code, |
|
|
}}); |
|
|
if ($return_code) |
|
|
{ |
|
|
# Something went wrong |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0411", variables => { |
|
|
resource => $resource, |
|
|
return_code => $return_code, |
|
|
output => $output, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
return($resource_found); |
|
|
} |
|
|
|
|
|
# This makes sure that any media in the server's optical drive exists here and is readable. |
|
|
sub validate_storage_optical |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
foreach my $file (sort {$a cmp $b} keys %{$anvil->data->{server}{optical}}) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0397", variables => { file => $file }}); |
|
|
|
|
|
# If the file doesn't exist, exit with OCF_ERR_INSTALLED (5). If we can't read it, exit with |
|
|
# OCF_ERR_PERM (4). |
|
|
if (not -e $file) |
|
|
{ |
|
|
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5). |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0398", variables => { file => $file }}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
elsif (not -r $file) |
|
|
{ |
|
|
# We can't read it. Exit with OCF_ERR_PERM (4). |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0399", variables => { file => $file }}); |
|
|
$anvil->nice_exit({exit_code => 4}); |
|
|
} |
|
|
else |
|
|
{ |
|
|
# We're OK. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0400", variables => { file => $file }}); |
|
|
} |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This verifies that the requested emulator exists and can be used. |
|
|
sub validate_emulator |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# What emulator is this using? |
|
|
my $emulator = $anvil->data->{server}{definition_xml}->{devices}->[0]->{emulator}->[0]; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { emulator => $emulator }}); |
|
|
if (not -e $emulator) |
|
|
{ |
|
|
# It doesn't exist. Exit with OCF_ERR_INSTALLED (5). |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0401", variables => { |
|
|
emulator => $emulator, |
|
|
definition_file => $anvil->data->{server}{definition_file}, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
if (not -x $emulator) |
|
|
{ |
|
|
# We can't execute it. Exit with OCF_ERR_PERM (4). |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0402", variables => { emulator => $emulator }}); |
|
|
$anvil->nice_exit({exit_code => 4}); |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This makes sure the name we see in the definition file matches what we expect. |
|
|
sub validate_name |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
if ($server ne $anvil->data->{server}{definition_xml}->{name}->[0]) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0403", variables => { |
|
|
server => $server, |
|
|
name => $anvil->data->{server}{definition_xml}->{name}, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This checks that there is enough RAM to run this server. |
|
|
sub validate_ram |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# How mcuh RAM does the server need? |
|
|
my $server_ram_value = $anvil->data->{server}{definition_xml}->{memory}->[0]->{content}; |
|
|
my $server_ram_units = $anvil->data->{server}{definition_xml}->{memory}->[0]->{unit}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server_ram_value => $server_ram_value, |
|
|
server_ram_units => $server_ram_units, |
|
|
}}); |
|
|
|
|
|
# Convert to bytes |
|
|
my $server_ram_bytes = $anvil->Convert->human_readable_to_bytes({size => $server_ram_value, type => $server_ram_units, base2 => 1 }); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server_ram_bytes => $server_ram_bytes." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}).")", |
|
|
}}); |
|
|
|
|
|
# How much RAM do we have available? |
|
|
my $available = 0; |
|
|
my ($free_output, $free_rc) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{free}." --bytes"}); |
|
|
foreach my $line (split/\n/, $free_output) |
|
|
{ |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }}); |
|
|
if ($line =~ /Mem:\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)$/) |
|
|
{ |
|
|
my $total = $1; |
|
|
my $used = $2; |
|
|
my $free = $3; |
|
|
my $shared = $4; |
|
|
my $cache = $5; |
|
|
$available = $6; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
total => $total." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $total})."})", |
|
|
used => $used." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $used})."})", |
|
|
free => $free." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $free})."})", |
|
|
shared => $shared." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $shared})."})", |
|
|
cache => $cache." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $cache})."})", |
|
|
available => $available." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $available})."})", |
|
|
}}); |
|
|
} |
|
|
} |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server_ram_bytes => $anvil->Convert->add_commas({number => $server_ram_bytes})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}).")", |
|
|
available => $anvil->Convert->add_commas({number => $available})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $available}).")", |
|
|
}}); |
|
|
if ($server_ram_bytes > $available) |
|
|
{ |
|
|
# Not enough free memory. |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0404", variables => { |
|
|
name => $anvil->data->{environment}{OCF_RESKEY_name}, |
|
|
ram => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}), |
|
|
ram_bytes => $anvil->Convert->add_commas({number => $server_ram_bytes}), |
|
|
available_ram => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_bytes}), |
|
|
available_ram_bytes => $anvil->Convert->add_commas({number => $available}), |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
### TODO: Make sure the appropriate SN ports are opened. |
|
|
# This stops (drbdadm down <server>) the storage for a given server on both nodes. |
|
|
sub manage_drbd_resource |
|
|
{ |
|
|
my ($anvil, $task, $resource) = @_; |
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
'server::drbd::peer::hostname' => $anvil->data->{server}{drbd}{peer}{hostname}, |
|
|
}}); |
|
|
|
|
|
# Stop the resource on the peer, then stop it here. |
|
|
my $peer_hostname = $anvil->data->{server}{drbd}{peer}{hostname}; |
|
|
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
peer_hostname => $peer_hostname, |
|
|
shell_call => $shell_call, |
|
|
}}); |
|
|
my ($output, $error, $return_code) = $anvil->Remote->call({ |
|
|
debug => 2, |
|
|
shell_call => $shell_call, |
|
|
target => $peer_hostname, |
|
|
}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
output => $output, |
|
|
error => $error, |
|
|
return_code => $return_code, |
|
|
}}); |
|
|
|
|
|
# Now call it locally |
|
|
$output = undef; |
|
|
$return_code = undef; |
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
output => $output, |
|
|
return_code => $return_code, |
|
|
}}); |
|
|
|
|
|
# Now wait for it to come up. |
|
|
my $wait = 1; |
|
|
while($wait) |
|
|
{ |
|
|
if ($wait) |
|
|
{ |
|
|
sleep 1; |
|
|
} |
|
|
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
|
output => $output, |
|
|
return_code => $return_code, |
|
|
}}); |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This reads the XML definition data into an XML data hash. |
|
|
sub read_server_definition |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; |
|
|
my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml"; |
|
|
my $server_xml = ""; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
|
server => $server, |
|
|
definition_file => $definition_file, |
|
|
}}); |
|
|
|
|
|
# If the file doesn't exist, return OCF_ERR_INSTALLED (5). If the file exists but we can't read it, |
|
|
# return OCF_ERR_PERM (4). |
|
|
if (not -e $definition_file) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0405", variables => { |
|
|
definition_file => $definition_file, |
|
|
server => $server, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 5}); |
|
|
} |
|
|
elsif (not -r $definition_file) |
|
|
{ |
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0406", variables => { |
|
|
definition_file => $definition_file, |
|
|
server => $server, |
|
|
}}); |
|
|
$anvil->nice_exit({exit_code => 4}); |
|
|
} |
|
|
|
|
|
# Still alive? Read it in. |
|
|
my $definition_xml = $anvil->Storage->read_file({file => $definition_file}); |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { definition_file => $definition_file }}); |
|
|
|
|
|
my $xml = XML::Simple->new(); |
|
|
eval { $server_xml = $xml->XMLin($definition_xml, KeyAttr => {}, ForceArray => 1) }; |
|
|
if ($@) |
|
|
{ |
|
|
chomp $@; |
|
|
my $error = "[ Error ] - The was a problem parsing: [$definition_file]. The error was:\n"; |
|
|
$error .= "===========================================================\n"; |
|
|
$error .= $@."\n"; |
|
|
$error .= "===========================================================\n"; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }}); |
|
|
$anvil->nice_exit({exit_code => 1}); |
|
|
} |
|
|
|
|
|
$anvil->data->{server}{definition_xml} = $server_xml; |
|
|
$anvil->data->{server}{definition_file} = $definition_file; |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This logs the details of this call. |
|
|
sub show_environment |
|
|
{ |
|
|
my ($anvil, $level) = @_; |
|
|
|
|
|
foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{switches}}) |
|
|
{ |
|
|
next if $key eq "raw"; |
|
|
next if $anvil->data->{switches}{$key} eq ""; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "switches::${key}" => $anvil->data->{switches}{$key} }}); |
|
|
} |
|
|
foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{environment}}) |
|
|
{ |
|
|
next if $anvil->data->{environment}{$key} eq ""; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "environment::${key}" => $anvil->data->{environment}{$key} }}); |
|
|
} |
|
|
foreach my $key (sort {$a cmp $b} keys %ENV) |
|
|
{ |
|
|
next if exists $anvil->data->{environment}{$key}; |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "ENV::${key}" => $ENV{$key} }}); |
|
|
} |
|
|
foreach my $value (@ARGV) |
|
|
{ |
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $level, list => { "ARGV" => $value }}); |
|
|
} |
|
|
|
|
|
return(0); |
|
|
} |
|
|
|
|
|
# This just prints a quick usage message for now. |
|
|
sub show_usage |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
### TODO: How to use this... |
|
|
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
} |
|
|
|
|
|
# This prints out the metadata and exits. |
|
|
sub show_metadata |
|
|
{ |
|
|
my ($anvil) = @_; |
|
|
|
|
|
# This is a pretty simple agent, by design. We only take a server name for now. |
|
|
print '<?xml version="1.0"?> |
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> |
|
|
<resource-agent name="ocs:alteeve:server" version="0.1"> |
|
|
<version>1.0</version> |
|
|
<longdesc lang="en"> |
|
|
This resource agent manages KVM+qemu virtual servers on an Anvil! m3 Intelligent Availability(tm) system. |
|
|
It manages underlying components like DRBD 9 storage resources, brodge connections and so forth. |
|
|
</longdesc> |
|
|
<shortdesc lang="en">Anvil! m3 server resource agent</shortdesc> |
|
|
<parameters> |
|
|
<parameter name="name" unique="1" required="1"> |
|
|
<longdesc lang="en"> |
|
|
This is the name of the server as reported by virsh. |
|
|
</longdesc> |
|
|
<shortdesc lang="en">Server name</shortdesc> |
|
|
<content type="string"/> |
|
|
</parameter> |
|
|
</parameters> |
|
|
<actions> |
|
|
<action name="start" timeout="30" /> |
|
|
<action name="stop" timeout="60" /> |
|
|
<action name="monitor" timeout="10" /> |
|
|
<action name="notify" timeout="20" /> |
|
|
<action name="migrate_to" timeout="600" /> |
|
|
<action name="migrate_from" timeout="600" /> |
|
|
<action name="meta-data" timeout="5" /> |
|
|
<action name="validate-all" timeout="20" /> |
|
|
</actions> |
|
|
</resource-agent> |
|
|
'; |
|
|
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
}
|
|
|
|