You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
719 lines
23 KiB
719 lines
23 KiB
#!/usr/bin/perl |
|
# |
|
# Author: Madison Kelly (mkelly@alteeve.ca) |
|
# Alteeve's Niche! Inc. - https://alteeve.com/w/ |
|
# Version: 0.0.1 |
|
# License: GPL v2+ |
|
# |
|
# This program ties LINBIT's DRBD fencing into pacemaker's stonith. It provides a power-fence alternative to |
|
# the default 'crm-{un,}fence-peer.sh' {un,}fence-handler. |
|
# |
|
# WARNING: This fence handler is probably not safe to use outside of an Anvil! IA platform. It makes a lot of |
|
# operational assumptions about the system and desired goals. |
|
# |
|
# Exit Codes (as per; http://lists.linbit.com/pipermail/drbd-dev/2006-November/000538.html) |
|
# - 3 -> peer is inconsistent |
|
# - 4 -> peer is outdated (this handler outdated it) [ resource fencing ] |
|
# - 5 -> peer was down / unreachable |
|
# - 6 -> peer is primary |
|
# - 7 -> peer got stonithed [ node fencing ] |
|
# ===] From crm-fence-peer.9.sh [=== |
|
# drbd_fence_peer_exit_code is per the exit code |
|
# convention of the DRBD "fence-peer" handler, |
|
# obviously. |
|
# 3: peer is already outdated or worse (e.g. inconsistent) |
|
# 4: peer has been successfully fenced |
|
# 5: peer not reachable, assumed to be dead |
|
# 6: please outdate yourself, peer is known (or likely) |
|
# to have better data, or is even currently primary. |
|
# (actually, currently it is "peer is active primary now", but I'd like to |
|
# change that meaning slightly towards the above meaning) |
|
# 7: peer has been STONITHed, thus assumed to be properly fenced |
|
# XXX IMO, this should rather be handled like 5, not 4. |
|
# ========= |
|
# |
|
# This program uses; |
|
# - 1 = Something failed |
|
# - 7 = Fence succeeded |
|
# - 255 = End of program hit... should never happen. |
|
# |
|
# TODO: |
|
# - Read the CIB; 'pcs status xml' or '/usr/sbin/cibadmin --local --query' ? |
|
# -- Map the peer's name in pacemaker. |
|
# -- Verify that stonith is enabled: |
|
# -- Verify that the node is not in maintenance mode: |
|
# -- Verify that we're quorate (a-la pacemaker): |
|
# - Verify that the resource is 'resource-and-stonith' |
|
# - Verify that the resource is 'UpToDate' (if not, should we suicide to give the other node priority, regardless of fence delay? what if both nodes have resources that are not UpToDate?) |
|
# - |
|
|
|
### NOTE: This doesn't use Anvil::Tools on purpose. We want to be quick and depend on as few things as |
|
### possible. |
|
|
|
use strict; |
|
use warnings; |
|
use XML::Simple; |
|
use Data::Dumper; |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
my $conf = { |
|
'log' => { |
|
facility => "local0", |
|
level => 2, |
|
line_numbers => 1, |
|
tag => $THIS_FILE, |
|
}, |
|
# If a program isn't at the defined path, $ENV{PATH} will be searched. |
|
path => { |
|
exe => { |
|
cibadmin => "/usr/sbin/cibadmin", |
|
drbdadm => "/usr/sbin/drbdadm", |
|
getent => "/usr/bin/getent", |
|
logger => "/usr/bin/logger", |
|
stonith_admin => "/usr/sbin/stonith_admin", |
|
}, |
|
}, |
|
# The script will set this. |
|
cluster => { |
|
target_node => "", |
|
}, |
|
# These are the environment variables set by DRBD. See 'man drbd.conf' |
|
# -> 'handlers'. |
|
environment => { |
|
# The resource triggering the fence. |
|
'DRBD_RESOURCE' => defined $ENV{DRBD_RESOURCE} ? $ENV{DRBD_RESOURCE} : "", |
|
# The resource minor number, or, in the case of volumes, numbers. |
|
'DRBD_MINOR' => defined $ENV{DRBD_MINOR} ? $ENV{DRBD_MINOR} : "", |
|
# This is the address format (ipv4, ipv6, etc) |
|
'DRBD_PEER_AF' => defined $ENV{DRBD_PEER_AF} ? $ENV{DRBD_PEER_AF} : "", |
|
# This is the IP address of the target node. |
|
'DRBD_PEER_ADDRESS' => defined $ENV{DRBD_PEER_ADDRESS} ? $ENV{DRBD_PEER_ADDRESS} : "", |
|
# This isn't set |
|
'DRBD_PEERS' => defined $ENV{DRBD_PEERS} ? $ENV{DRBD_PEERS} : "", |
|
### NOTE: Below here are undocumented variables. Don't expect them to always be useful. |
|
# My node ID |
|
'DRBD_MY_NODE_ID' => defined $ENV{DRBD_MY_NODE_ID} ? $ENV{DRBD_MY_NODE_ID} : "", |
|
# The target's ID |
|
'DRBD_PEER_NODE_ID' => defined $ENV{DRBD_PEER_NODE_ID} ? $ENV{DRBD_PEER_NODE_ID} : "", |
|
}, |
|
}; |
|
|
|
# These are the full host names of the nodes given their IDs. |
|
foreach my $i (0..31) |
|
{ |
|
my $key = "DRBD_NODE_ID_".$i; |
|
if ((exists $ENV{$key}) && (defined $ENV{$key})) |
|
{ |
|
$conf->{environment}{$key} = $ENV{$key}; |
|
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => 2}); |
|
} |
|
} |
|
|
|
# Find executables. |
|
find_executables($conf); |
|
|
|
# Something for the logs |
|
to_log($conf, {message => "Attempting to fence the peer via pacemaker's stonith...", 'line' => __LINE__}); |
|
|
|
### TESTING - Simulate a call from node 1 against node 2 |
|
$conf->{environment}{DRBD_NODE_ID_0} = "m3-a01n02.alteeve.com"; |
|
$conf->{environment}{DRBD_NODE_ID_1} = "m3-a01n01.alteeve.com"; |
|
$conf->{environment}{DRBD_MINOR} = "0"; |
|
$conf->{environment}{DRBD_MY_NODE_ID} = "1"; |
|
$conf->{environment}{DRBD_PEER_ADDRESS} = "10.41.10.2"; |
|
$conf->{environment}{DRBD_PEER_AF} = "ipv4"; |
|
$conf->{environment}{DRBD_PEER_NODE_ID} = "0"; |
|
$conf->{environment}{DRBD_RESOURCE} = "r0"; |
|
### TESTING |
|
|
|
# Record the environment variables |
|
foreach my $key (sort {$a cmp $b} keys %{$conf->{environment}}) |
|
{ |
|
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => 2}); |
|
} |
|
foreach my $key (sort {$a cmp $b} keys %ENV) |
|
{ |
|
next if exists $conf->{environment}{$key}; |
|
to_log($conf, {message => "System Environment variable: [$key] -> [".$ENV{$key}."]", 'line' => __LINE__, level => 3}); |
|
} |
|
|
|
# Make sure we at least have the target's IP. |
|
if (not $conf->{environment}{DRBD_PEER_ADDRESS}) |
|
{ |
|
to_log($conf, {message => "Target's IP not set via the DRBD_PEER_ADDRESS environment variable. Unable to proceed.", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
|
|
# This also checks that we're quorate and not in maintenance mode. |
|
identify_peer($conf); |
|
|
|
# Is the peer already gone? If so, return a success. |
|
|
|
|
|
# If we're still alive, we now need to check the DRBD resource disk state locally. |
|
get_drbd_status($conf); |
|
|
|
to_log($conf, {message => "Ready to fence: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1}); |
|
exit(7); |
|
|
|
|
|
# Eject the target, if I can. |
|
eject_target($conf); |
|
|
|
# In case cman decided by itself to fence the node... |
|
eventually_wait_for_fenced($conf); # May exit with exit code 7 |
|
|
|
# Only kill the target if gracefull eject did not work. This is |
|
# important because fence_node does not want to be invoked |
|
# in multiple instances in parallel. |
|
kill_target($conf); |
|
|
|
exit(255); |
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
# This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is, |
|
# abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live) |
|
sub get_drbd_status |
|
{ |
|
my ($conf) = @_; |
|
|
|
my $resource = ""; |
|
my $peer = ""; |
|
my $local_all_uptodate = 1; |
|
my $peer_all_uptodate = 1; |
|
my $shell_call = $conf->{path}{exe}{drbdadm}." status all"; |
|
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); |
|
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; |
|
while(<$file_handle>) |
|
{ |
|
# This should not generate output. |
|
chomp; |
|
my $line = $_; |
|
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2}); |
|
|
|
if (not $line) |
|
{ |
|
$resource = ""; |
|
$peer = ""; |
|
to_log($conf, {message => "resource: [$resource], peer: [$peer]", 'line' => __LINE__, level => 2}); |
|
next; |
|
} |
|
if ($line ~= /^(\S+)\s+role/) |
|
{ |
|
$resource = $1; |
|
to_log($conf, {message => "resource: [$resource]", 'line' => __LINE__, level => 2}); |
|
next; |
|
} |
|
if ($line =~ /^\s+(.*?) role:/) |
|
{ |
|
$peer = $1; |
|
to_log($conf, {message => "peer: [$peer]", 'line' => __LINE__, level => 2}); |
|
next; |
|
} |
|
if ($resource) |
|
{ |
|
if ($line =~ /disk:(.*)$/) |
|
{ |
|
my $local_dstate = $1; |
|
to_log($conf, {message => "local_dstate: [$local_dstate]", 'line' => __LINE__, level => 2}); |
|
if (lc($local_dstate) ne "uptodate") |
|
{ |
|
$local_all_uptodate = 0; |
|
to_log($conf, {message => "local_all_uptodate: [$local_all_uptodate]", 'line' => __LINE__, level => 2}); |
|
} |
|
next; |
|
} |
|
if ($line =~ /peer-disk:(.*)$/) |
|
{ |
|
my $peer_dstate = $1; |
|
to_log($conf, {message => "peer_dstate: [$peer_dstate]", 'line' => __LINE__, level => 2}); |
|
if (lc($peer_dstate) ne "uptodate") |
|
{ |
|
$peer_all_uptodate = 0; |
|
to_log($conf, {message => "peer: [$peer], peer_all_uptodate: [$peer_all_uptodate]", 'line' => __LINE__, level => 2}); |
|
} |
|
next; |
|
} |
|
} |
|
|
|
} |
|
close $file_handle; |
|
my $return_code = $?; |
|
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2}); |
|
|
|
# If we're not all UpToDate, but the peer is, abort |
|
if ((not $local_all_uptodate) && ($peer_all_uptodate)) |
|
{ |
|
# We're not good |
|
to_log($conf, {message => "This node has DRBD resources that are not UpToDate, but the peer is fully UpToDate. Aborting.", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This identifies the pacemaker name of the target node. If it can't find |
|
sub identify_peer |
|
{ |
|
my ($conf) = @_; |
|
|
|
# I know the target's (SN) IP, map it to a node. |
|
my $target_host = ""; |
|
my $target_ip = $conf->{environment}{DRBD_PEER_ADDRESS}; |
|
|
|
# First, can we translate the IP to a hostname? |
|
my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip; |
|
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); |
|
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; |
|
while(<$file_handle>) |
|
{ |
|
# This should not generate output. |
|
chomp; |
|
my $line = $_; |
|
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2}); |
|
if ($line =~ /^$target_ip\s+(.*)$/) |
|
{ |
|
$target_host = $1; |
|
to_log($conf, {message => ">> target_host: [$target_host]", 'line' => __LINE__, level => 2}); |
|
|
|
# Strip off any suffix, we only want the short name. |
|
$target_host =~ s/\..*//; |
|
to_log($conf, {message => "<< target_host: [$target_host]", 'line' => __LINE__, level => 2}); |
|
last; |
|
} |
|
} |
|
close $file_handle; |
|
my $return_code = $?; |
|
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2}); |
|
|
|
# If I got the host name, try to match it to a pacemaker node name. |
|
if ($target_host) |
|
{ |
|
# Get the current CIB |
|
my $xml_opened = 0; |
|
my $xml_closed = 0; |
|
my $cib = '<?xml version="1.0" encoding="UTF-8"?>'; |
|
my $shell_call = $conf->{path}{exe}{cibadmin}." --local --query"; |
|
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); |
|
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; |
|
while(<$file_handle>) |
|
{ |
|
# This should not generate output. |
|
chomp; |
|
my $line = $_; |
|
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 3}); |
|
|
|
$cib .= "\n".$line; |
|
if ($line =~ /Signon to CIB failed/i) |
|
{ |
|
# Failed to connect, we're probably not in the cluster. |
|
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to get the CIB status.", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
if ($line =~ /^<cib .*?>$/) |
|
{ |
|
$xml_opened = 1; |
|
to_log($conf, {message => "xml_opened: [$xml_opened].", 'line' => __LINE__, level => 2}); |
|
} |
|
if ($line =~ /^<\/cib>$/) |
|
{ |
|
$xml_closed = 1; |
|
to_log($conf, {message => "xml_closed: [$xml_closed].", 'line' => __LINE__, level => 2}); |
|
} |
|
} |
|
close $file_handle; |
|
my $return_code = $?; |
|
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2}); |
|
|
|
# Now parse the CIB XML if I read it OK. |
|
to_log($conf, {message => "xml_opened: [$xml_opened], xml_closed: [$xml_closed].", 'line' => __LINE__, level => 2}); |
|
if (($xml_opened) && ($xml_closed)) |
|
{ |
|
# We're good |
|
my $xml = XML::Simple->new(); |
|
my $body = ""; |
|
eval { $body = $xml->XMLin($cib, KeyAttr => { language => 'name', key => 'name' }, ForceArray => [ 'id' ]) }; |
|
if ($@) |
|
{ |
|
chomp $@; |
|
my $error = "[ Error ] - The was a problem parsing: [$cib]. The error was:\n"; |
|
$error .= "===========================================================\n"; |
|
$error .= $@."\n"; |
|
$error .= "===========================================================\n"; |
|
to_log($conf, {message => $error, 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
else |
|
{ |
|
# Parse the XML. |
|
my $host_name = $ENV{HOSTNAME}; |
|
my $short_host_name = $ENV{HOSTNAME}; |
|
$short_host_name =~ s/\..*$//; |
|
|
|
#print "XML body: [$body]\n"; |
|
foreach my $key (sort {$a cmp $b} keys %{$body}) |
|
{ |
|
#print "Key: [$key] -> [".$body->{$key}."]\n"; |
|
} |
|
|
|
foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}}) |
|
{ |
|
my $node = $hash_ref->{uname}; |
|
my $id = $hash_ref->{id}; |
|
if ($node =~ /^$target_host/) |
|
{ |
|
$conf->{cluster}{target_node} = $node; |
|
to_log($conf, {message => "Found the pacemaker name of the target node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1}); |
|
} |
|
elsif ($node =~ /^$short_host_name/) |
|
{ |
|
# THis is me. Am I in maintenance mode? |
|
if (exists $hash_ref->{instance_attributes}) |
|
{ |
|
# We've got some data... |
|
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : ""; |
|
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : ""; |
|
to_log($conf, {message => "node: [$node] instance attribyte name: [$name], value: [$value]", 'line' => __LINE__, level => 1}); |
|
if (($name eq "maintenance") and ($value eq "on")) |
|
{ |
|
# We're in maintenance mode, abort. |
|
to_log($conf, {message => "This node is in maintenance mode. Not able to fence!", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
} |
|
#print Dumper $hash_ref; |
|
} |
|
} |
|
|
|
my $quorate = $body->{'have-quorum'}; |
|
to_log($conf, {message => "quorate: [$quorate]", 'line' => __LINE__, level => 1}); |
|
if (not $quorate) |
|
{ |
|
to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
# Failed to read the CIB XML. |
|
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to read the CIB XML properly.", 'line' => __LINE__, level => 2, priority => "err"}); |
|
exit(1); |
|
} |
|
} |
|
|
|
# Did I find the target? |
|
if (not $conf->{cluster}{target_node}) |
|
{ |
|
to_log($conf, {message => "Failed to find the pacemaker name of the target node. Unable to proceed!", 'line' => __LINE__, level => 0, priority => "err"}); |
|
exit(1); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This checks the given paths and, if something isn't found, it searches PATH trying to find it. |
|
sub find_executables |
|
{ |
|
my ($conf) = @_; |
|
|
|
# Variables. |
|
my $check = ""; |
|
my $bad = 0; |
|
|
|
# Log entries can only happen if I've found 'logger', so an extra check will be made on 'to_log' |
|
# calls. |
|
my @dirs = split/:/, $ENV{PATH}; |
|
foreach my $exe (sort {$b cmp $a} keys %{$conf->{path}{exe}}) |
|
{ |
|
if ( not -e $conf->{path}{exe}{$exe} ) |
|
{ |
|
to_log($conf, {message => "The program: [$exe] is not at: [".$conf->{path}{exe}{$exe}."]. Looking for it now...", 'line' => __LINE__, level => 1}); |
|
foreach my $path (@dirs) |
|
{ |
|
$check = "$path/$exe"; |
|
$check =~ s/\/\//\//g; |
|
to_log($conf, {message => "Checking: [$check]", 'line' => __LINE__, level => 2}); |
|
if ( -e $check ) |
|
{ |
|
if (-e $conf->{path}{exe}{logger}) |
|
{ |
|
to_log($conf, {message => "Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]", 'line' => __LINE__, level => 1}); |
|
} |
|
else |
|
{ |
|
warn "DEBUG: Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]\n"; |
|
} |
|
$conf->{path}{exe}{$exe} = $check; |
|
} |
|
else |
|
{ |
|
to_log($conf, {message => "Not found.", 'line' => __LINE__, level => 2}); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
to_log($conf, {message => "Found!", 'line' => __LINE__, level => 3}); |
|
next; |
|
} |
|
|
|
# Make sure it exists now. |
|
to_log($conf, {message => "Checking again if: [$exe] is at: [".$conf->{path}{exe}{$exe}."].", 'line' => __LINE__, level => 3}); |
|
if (not -e $conf->{path}{exe}{$exe}) |
|
{ |
|
$bad = 1; |
|
if (-e $conf->{path}{exe}{logger}) |
|
{ |
|
to_log($conf, {message => "Failed to find executable: [$exe]. Unable to proceed.", 'line' => __LINE__, level => 0}); |
|
to_log(LOG_DEBUG(), $conf, __LINE__, ""); |
|
} |
|
else |
|
{ |
|
warn "Failed to find executable: [$exe]. Unable to proceed.\n"; |
|
} |
|
} |
|
} |
|
if ($bad) |
|
{ |
|
exit(1); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This kills remote node. |
|
sub kill_target |
|
{ |
|
my ($conf) = @_; |
|
|
|
# Variables |
|
my $remote_node = $conf->{environment}{DRBD_PEERS}; |
|
my $sc = ""; |
|
my $line = ""; |
|
my $sc_exit = ""; |
|
|
|
# Hug it and squeeze it and call it George. |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Fencing target: [$remote_node]..."); |
|
|
|
my $shell_call = $conf->{path}{exe}{fence_node}." -v ".$remote_node; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]"); |
|
open (my $file_handle, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$shell_call], error was: $!"); |
|
while(<$file_handle>) |
|
{ |
|
chomp; |
|
$line = $_; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: $line"); |
|
if ($line=~/fence .*? success/) |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "'fence_node $remote_node' appears to have succeeded!"); |
|
} |
|
else |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "'fence_node $remote_node' appears to have failed!"); |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Read: [$line]"); |
|
} |
|
} |
|
close $file_handle; |
|
my $return_code = $?; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to fence node: [$remote_node] exited with: [$return_code]"); |
|
|
|
# Exit. |
|
if ($return_code) |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Attempt to fence: [$remote_node] failed!"); |
|
exit(1); |
|
} |
|
else |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Fencing of: [$remote_node] succeeded!"); |
|
exit(7); |
|
} |
|
|
|
# This should not be reachable. |
|
return(0); |
|
} |
|
|
|
# This ejects the remote node from the cluster, if cluster comms are still up. |
|
sub eject_target |
|
{ |
|
my ($conf) = @_; |
|
|
|
# Variables; |
|
my $remote_node = ""; |
|
my $sc = ""; |
|
my $sc_exit = ""; |
|
my $shell_call = ""; |
|
my $line = ""; |
|
|
|
### I don't know if I really want to use/keep this. |
|
# If the node is still a cluster member, kick it out. |
|
$remote_node = $conf->{environment}{DRBD_PEERS}; |
|
if ($conf->{nodes}{$remote_node}{member} eq "M") |
|
{ |
|
# It is, kick it out. If cluster comms are up, this will |
|
# trigger a fence in a few moment, regardless of what we do |
|
# next. |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Target node: [$remote_node] is a cluster member, attempting to eject."); |
|
$sc = IO::Handle->new(); |
|
$shell_call = "$conf->{path}{exe}{cman_tool} kill -n $remote_node"; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]"); |
|
open ($sc, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!"); |
|
while(<$sc>) |
|
{ |
|
chomp; |
|
$line = $_; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: line: [$line]"); |
|
} |
|
$sc->close(); |
|
$sc_exit = $?; |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to force-remove node: [$remote_node] exited with: [$sc_exit]"); |
|
|
|
return 1; |
|
} |
|
else |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "Target node: [$remote_node] is *not* a cluster member (state: [$conf->{nodes}{$remote_node}{member}]). Not ejecting."); |
|
|
|
return 0; |
|
} |
|
} |
|
|
|
# Log file entries |
|
sub to_log |
|
{ |
|
my ($conf, $parameters) = @_; |
|
|
|
my $facility = defined $parameters->{facility} ? $parameters->{facility} : $conf->{'log'}{facility}; |
|
my $level = defined $parameters->{level} ? $parameters->{level} : 1; |
|
my $line = defined $parameters->{'line'} ? $parameters->{'line'} : 0; |
|
my $message = defined $parameters->{message} ? $parameters->{message} : ""; |
|
my $priority = defined $parameters->{priority} ? $parameters->{priority} : ""; |
|
|
|
# Leave if we don't care about this message |
|
return if $level > $conf->{'log'}{level}; |
|
return if not $message; |
|
|
|
# Build the message. We log the line |
|
if (($conf->{'log'}{line_numbers}) && ($line)) |
|
{ |
|
$message = $line."; ".$message; |
|
} |
|
|
|
my $priority_string = $facility; |
|
if ($priority) |
|
{ |
|
$priority_string .= ".".$priority; |
|
} |
|
elsif ($level eq "0") |
|
{ |
|
$priority_string .= ".notice"; |
|
} |
|
elsif (($level eq "1") or ($level eq "2")) |
|
{ |
|
$priority_string .= ".info"; |
|
} |
|
else |
|
{ |
|
$priority_string .= ".debug"; |
|
} |
|
|
|
# Clean up the string for bash |
|
$message =~ s/"/\\\"/gs; |
|
$message =~ s/\(/\\\(/gs; |
|
|
|
my $shell_call = $conf->{path}{exe}{logger}." --priority ".$priority_string." --tag ".$conf->{'log'}{tag}." -- \"".$message."\""; |
|
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; |
|
while(<$file_handle>) |
|
{ |
|
# This should not generate output. |
|
chomp; |
|
my $line = $_; |
|
print "Unexpected logging output: [".$line."]\n"; |
|
} |
|
close $file_handle; |
|
|
|
return(0); |
|
} |
|
|
|
sub get_fenced_state |
|
{ |
|
my ($conf) = @_; |
|
|
|
my $sc = IO::Handle->new(); |
|
open($sc, "fence_tool -n ls |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: fence_tools error was $!\n"); |
|
|
|
my %fence_state; |
|
<$sc> =~ /fence domain/; |
|
$fence_state{member_count} = $1 if (<$sc> =~ /member count\s+([0-9]+)/); |
|
$fence_state{victim_count} = $1 if (<$sc> =~ /victim count\s+([0-9]+)/); |
|
$fence_state{victim_now} = $1 if (<$sc> =~ /victim now\s+([0-9]+)/); |
|
$fence_state{master_nodeid} = $1 if (<$sc> =~ /master nodeid\s+([0-9]+)/); |
|
$fence_state{wait_state} = $1 if (<$sc> =~ /wait state\s+(\w+)$/); |
|
$sc->close(); |
|
|
|
return \%fence_state; |
|
} |
|
|
|
sub wait_for_fenced_status |
|
{ |
|
my ($conf, $target_state, $time_seconds)=@_; |
|
my $fenced_state; |
|
|
|
while ($time_seconds) |
|
{ |
|
$fenced_state = get_fenced_state($conf); |
|
if ($fenced_state->{wait_state} eq $target_state) |
|
{ |
|
return $fenced_state; |
|
} |
|
sleep(1); |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Waiting for $target_state. Now $fenced_state->{wait_state}\n"); |
|
$time_seconds--; |
|
} |
|
|
|
return $fenced_state; |
|
} |
|
|
|
sub eventually_wait_for_fenced |
|
{ |
|
my ($conf) = @_; |
|
|
|
my $fenced_state1 = wait_for_fenced_status($conf, "fencing", 30); |
|
|
|
if ($fenced_state1->{wait_state} ne "fencing") |
|
{ |
|
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Expected fencd to do a fence action, got $fenced_state1->{wait_state}\n"); |
|
return; |
|
} |
|
|
|
my $to_fence_node_id = $conf->{nodes}{$conf->{environment}{DRBD_PEERS}}{id}; |
|
if ($fenced_state1->{victim_now} != $to_fence_node_id) |
|
{ |
|
to_log(LOG_ERR(), $conf, __LINE__, "Fenced is shooting at $fenced_state1->{victim_now}; Should shoot at $to_fence_node_id\n"); |
|
} |
|
|
|
my $fenced_state2 = wait_for_fenced_status($conf, "none", 240); |
|
if ($fenced_state2->{wait_state} eq "none") |
|
{ |
|
to_log(LOG_INFO(), $conf, __LINE__, "Seems fenced was successfull\n"); |
|
exit 7; |
|
} |
|
else |
|
{ |
|
to_log(LOG_ERR(), $conf, __LINE__, "Fenced failed" . Dumper($fenced_state1) . Dumper($fenced_state2)); |
|
} |
|
} |
|
|
|
|