anvil/tools/fence_pacemaker

637 lines
21 KiB
Plaintext
Raw Normal View History

#!/usr/bin/perl
#
# Author: Madison Kelly (mkelly@alteeve.ca)
# Alteeve's Niche! Inc. - https://alteeve.com/w/
# Version: 0.0.1
# License: GPL v2+
#
# This program ties LINBIT's DRBD fencing into pacemaker's stonith. It provides a power-fence alternative to
# the default 'crm-{un,}fence-peer.sh' {un,}fence-handler.
#
# WARNING: This fence handler is probably not safe to use outside of an Anvil! IA platform. It makes a lot of
# operational assumptions about the system and desired goals.
#
# Exit Codes (as per; http://lists.linbit.com/pipermail/drbd-dev/2006-November/000538.html)
# - 3 -> peer is inconsistent
# - 4 -> peer is outdated (this handler outdated it) [ resource fencing ]
# - 5 -> peer was down / unreachable
# - 6 -> peer is primary
# - 7 -> peer got stonithed [ node fencing ]
# ===] From crm-fence-peer.9.sh [===
# drbd_fence_peer_exit_code is per the exit code
# convention of the DRBD "fence-peer" handler,
# obviously.
# 3: peer is already outdated or worse (e.g. inconsistent)
# 4: peer has been successfully fenced
# 5: peer not reachable, assumed to be dead
# 6: please outdate yourself, peer is known (or likely)
# to have better data, or is even currently primary.
# (actually, currently it is "peer is active primary now", but I'd like to
# change that meaning slightly towards the above meaning)
# 7: peer has been STONITHed, thus assumed to be properly fenced
# XXX IMO, this should rather be handled like 5, not 4.
# =========
#
# This program uses;
# - 1 = Something failed or end of program hit (should not happen).
# - 7 = Fence succeeded
#
# TODO:
# - Read the CIB; 'pcs status xml' or '/usr/sbin/cibadmin --local --query' ?
# -- Map the peer's name in pacemaker.
# -- Verify that stonith is enabled:
# -- Verify that the node is not in maintenance mode:
# -- Verify that we're quorate (a-la pacemaker):
# - Verify that the resource is 'resource-and-stonith'
# - Verify that the resource is 'UpToDate' (if not, should we suicide to give the other node priority, regardless of fence delay? what if both nodes have resources that are not UpToDate?)
# -
### NOTE: This doesn't use Anvil::Tools on purpose. We want to be quick and depend on as few things as
### possible.
use strict;
use warnings;
use XML::Simple;
use Data::Dumper;
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $conf = {
'log' => {
facility => "local0",
level => 2,
line_numbers => 1,
tag => $THIS_FILE,
},
# If a program isn't at the defined path, $ENV{PATH} will be searched.
path => {
exe => {
cibadmin => "/usr/sbin/cibadmin",
crm_error => "/usr/sbin/crm_error",
drbdadm => "/usr/sbin/drbdadm",
echo => "/usr/bin/echo",
getent => "/usr/bin/getent",
logger => "/usr/bin/logger",
stonith_admin => "/usr/sbin/stonith_admin",
},
},
# The script will set this.
cluster => {
target_node => "",
},
# These are the environment variables set by DRBD. See 'man drbd.conf'
# -> 'handlers'.
environment => {
# The resource triggering the fence.
'DRBD_RESOURCE' => defined $ENV{DRBD_RESOURCE} ? $ENV{DRBD_RESOURCE} : "",
# The resource minor number, or, in the case of volumes, numbers.
'DRBD_MINOR' => defined $ENV{DRBD_MINOR} ? $ENV{DRBD_MINOR} : "",
# This is the address format (ipv4, ipv6, etc)
'DRBD_PEER_AF' => defined $ENV{DRBD_PEER_AF} ? $ENV{DRBD_PEER_AF} : "",
# This is the IP address of the target node.
'DRBD_PEER_ADDRESS' => defined $ENV{DRBD_PEER_ADDRESS} ? $ENV{DRBD_PEER_ADDRESS} : "",
# This isn't set
'DRBD_PEERS' => defined $ENV{DRBD_PEERS} ? $ENV{DRBD_PEERS} : "",
### NOTE: Below here are undocumented variables. Don't expect them to always be useful.
# My node ID
'DRBD_MY_NODE_ID' => defined $ENV{DRBD_MY_NODE_ID} ? $ENV{DRBD_MY_NODE_ID} : "",
# The target's ID
'DRBD_PEER_NODE_ID' => defined $ENV{DRBD_PEER_NODE_ID} ? $ENV{DRBD_PEER_NODE_ID} : "",
},
};
# Find executables.
find_executables($conf);
# Something for the logs
to_log($conf, {message => "Attempting to fence the peer via pacemaker's stonith...", 'line' => __LINE__});
# These are the full host names of the nodes given their IDs.
foreach my $i (0..31)
{
my $key = "DRBD_NODE_ID_".$i;
if ((exists $ENV{$key}) && (defined $ENV{$key}))
{
$conf->{environment}{$key} = $ENV{$key};
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => 2});
}
}
# Record the environment variables
foreach my $key (sort {$a cmp $b} keys %{$conf->{environment}})
{
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => 2});
}
foreach my $key (sort {$a cmp $b} keys %ENV)
{
next if exists $conf->{environment}{$key};
to_log($conf, {message => "System Environment variable: [$key] -> [".$ENV{$key}."]", 'line' => __LINE__, level => 3});
}
# Make sure we at least have the target's IP.
if (not $conf->{environment}{DRBD_PEER_ADDRESS})
{
to_log($conf, {message => "Target's IP not set via the DRBD_PEER_ADDRESS environment variable. Unable to proceed.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# This also checks that we're quorate and not in maintenance mode.
identify_peer($conf);
# If we're still alive, we now need to check the DRBD resource disk state locally.
get_drbd_status($conf);
to_log($conf, {message => "Ready to fence: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
# Do the deed
kill_target($conf);
# If we hit here, something very wrong happened.
exit(1);
#############################################################################################################
# Functions #
#############################################################################################################
# This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is,
# abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live)
sub get_drbd_status
{
my ($conf) = @_;
my $resource = "";
my $peer = "";
my $local_all_uptodate = 1;
my $peer_all_uptodate = 1;
my $shell_call = $conf->{path}{exe}{drbdadm}." status all";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 3});
if (not $line)
{
$resource = "";
$peer = "";
to_log($conf, {message => "resource: [$resource], peer: [$peer]", 'line' => __LINE__, level => 3});
next;
}
if ($line =~ /^(\S+)\s+role/)
{
$resource = $1;
to_log($conf, {message => "resource: [$resource]", 'line' => __LINE__, level => 3});
next;
}
if ($line =~ /^\s+(.*?) role:/)
{
$peer = $1;
to_log($conf, {message => "peer: [$peer]", 'line' => __LINE__, level => 3});
next;
}
if ($resource)
{
if ($line =~ /disk:(.*)$/)
{
my $local_dstate = $1;
$local_dstate =~ s/\s.*$//;
to_log($conf, {message => "local_dstate: [$local_dstate]", 'line' => __LINE__, level => 2});
if (lc($local_dstate) ne "uptodate")
{
$local_all_uptodate = 0;
to_log($conf, {message => "local_all_uptodate: [$local_all_uptodate]", 'line' => __LINE__, level => 2});
}
next;
}
if ($line =~ /peer-disk:(.*)$/)
{
my $peer_dstate = $1;
$peer_dstate =~ s/\s.*$//;
to_log($conf, {message => "peer: [$peer], peer_dstate: [$peer_dstate]", 'line' => __LINE__, level => 2});
if (lc($peer_dstate) ne "uptodate")
{
$peer_all_uptodate = 0;
to_log($conf, {message => "peer_all_uptodate: [$peer_all_uptodate]", 'line' => __LINE__, level => 2});
}
next;
}
}
}
close $file_handle;
my $return_code = $?;
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2});
# If we're not all UpToDate, but the peer is, abort
to_log($conf, {message => "local_all_uptodate: [$local_all_uptodate], peer_all_uptodate: [$peer_all_uptodate]", 'line' => __LINE__, level => 2});
if ((not $local_all_uptodate) && ($peer_all_uptodate))
{
# We're not good
to_log($conf, {message => "This node has DRBD resources that are not UpToDate, but the peer is fully UpToDate. Aborting.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This identifies the pacemaker name of the target node. If it can't find
sub identify_peer
{
my ($conf) = @_;
# I know the target's (SN) IP, map it to a node.
my $target_host = "";
my $target_ip = $conf->{environment}{DRBD_PEER_ADDRESS};
# First, can we translate the IP to a host name?
my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip;
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2});
if ($line =~ /^$target_ip\s+(.*)$/)
{
$target_host = $1;
to_log($conf, {message => ">> target_host: [$target_host]", 'line' => __LINE__, level => 2});
# Strip off any suffix, we only want the short name.
$target_host =~ s/\..*//;
to_log($conf, {message => "<< target_host: [$target_host]", 'line' => __LINE__, level => 2});
last;
}
}
close $file_handle;
my $return_code = $?;
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2});
# If I got the host name, try to match it to a pacemaker node name.
if ($target_host)
{
# Get the current CIB (in an XML::Simple hash). This will exit if it fails to read the XML
# and convert it to an XML::Simple hash.
my $body = read_cib($conf);
# Parse the XML.
my $host_name = $ENV{HOSTNAME};
my $short_host_name = $ENV{HOSTNAME};
$short_host_name =~ s/\..*$//;
foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}})
{
my $node = $hash_ref->{uname};
my $id = $hash_ref->{id};
if ($node =~ /^$target_host/)
{
$conf->{cluster}{target_node} = $node;
to_log($conf, {message => "Found the pacemaker name of the target node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
}
elsif ($node =~ /^$short_host_name/)
{
# THis is me. Am I in maintenance mode?
if (exists $hash_ref->{instance_attributes})
{
# We've got some data...
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : "";
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : "";
to_log($conf, {message => "node: [$node] instance attribyte name: [$name], value: [$value]", 'line' => __LINE__, level => 1});
if (($name eq "maintenance") and ($value eq "on"))
{
# We're in maintenance mode, abort.
to_log($conf, {message => "This node is in maintenance mode. Not able to fence!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
}
my $quorate = $body->{'have-quorum'};
to_log($conf, {message => "quorate: [$quorate]", 'line' => __LINE__, level => 1});
if (not $quorate)
{
to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# See if the target node is already out of the cluster.
foreach my $hash_ref (@{$body->{status}{node_state}})
{
my $node = $hash_ref->{uname};
my $join = $hash_ref->{'join'};
my $expected = $hash_ref->{expected};
to_log($conf, {message => "node: [$node] join: [$join], expected: [$expected]", 'line' => __LINE__, level => 3});
if ($node eq $conf->{cluster}{target_node})
{
to_log($conf, {message => "Checking the status of target node: [$node].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down"))
{
# The node is out.
to_log($conf, {message => "The node: [$node] is already down. No actual fence needed.", 'line' => __LINE__, level => 1});
exit(7);
}
else
{
to_log($conf, {message => "The node: [$node] is: [$join/$expected] (join/expected). Proceeding with the fence action.", 'line' => __LINE__, level => 1});
}
}
}
}
# Did I find the target?
if (not $conf->{cluster}{target_node})
{
to_log($conf, {message => "Failed to find the pacemaker name of the target node. Unable to proceed!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This reads in the CIB XML and returns it as a single multi-line string.
sub read_cib
{
my ($conf) = @_;
my $xml_opened = 0;
my $xml_closed = 0;
my $body = "";
my $cib = '<?xml version="1.0" encoding="UTF-8"?>';
my $shell_call = $conf->{path}{exe}{cibadmin}." --local --query";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 3});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 3});
$cib .= "\n".$line;
if ($line =~ /Signon to CIB failed/i)
{
# Failed to connect, we're probably not in the cluster.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to get the CIB status.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
if ($line =~ /^<cib .*?>$/)
{
$xml_opened = 1;
to_log($conf, {message => "xml_opened: [$xml_opened].", 'line' => __LINE__, level => 3});
}
if ($line =~ /^<\/cib>$/)
{
$xml_closed = 1;
to_log($conf, {message => "xml_closed: [$xml_closed].", 'line' => __LINE__, level => 3});
}
}
close $file_handle;
my $return_code = $?;
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 3});
to_log($conf, {message => "cib: ==========\n$cib\n==========", 'line' => __LINE__, level => 3});
# Now parse the CIB XML if I read it OK.
to_log($conf, {message => "xml_opened: [$xml_opened], xml_closed: [$xml_closed].", 'line' => __LINE__, level => 3});
if (($xml_opened) && ($xml_closed))
{
# We're good
my $xml = XML::Simple->new();
eval { $body = $xml->XMLin($cib, KeyAttr => { language => 'name', key => 'name' }, ForceArray => [ 'id' ]) };
if ($@)
{
chomp $@;
my $error = "[ Error ] - The was a problem parsing: [$cib]. The error was:\n";
$error .= "===========================================================\n";
$error .= $@."\n";
$error .= "===========================================================\n";
to_log($conf, {message => $error, 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
else
{
# Failed to read the CIB XML.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to read the CIB XML properly.", 'line' => __LINE__, level => 2, priority => "err"});
exit(1);
}
return($body);
}
# This checks the given paths and, if something isn't found, it searches PATH trying to find it.
sub find_executables
{
my ($conf) = @_;
# Variables.
my $check = "";
my $bad = 0;
# Log entries can only happen if I've found 'logger', so an extra check will be made on 'to_log'
# calls.
my @dirs = split/:/, $ENV{PATH};
foreach my $exe (sort {$b cmp $a} keys %{$conf->{path}{exe}})
{
if ( not -e $conf->{path}{exe}{$exe} )
{
to_log($conf, {message => "The program: [$exe] is not at: [".$conf->{path}{exe}{$exe}."]. Looking for it now...", 'line' => __LINE__, level => 1});
foreach my $path (@dirs)
{
$check = "$path/$exe";
$check =~ s/\/\//\//g;
to_log($conf, {message => "Checking: [$check]", 'line' => __LINE__, level => 2});
if ( -e $check )
{
if (-e $conf->{path}{exe}{logger})
{
to_log($conf, {message => "Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]", 'line' => __LINE__, level => 1});
}
else
{
warn "DEBUG: Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]\n";
}
$conf->{path}{exe}{$exe} = $check;
}
else
{
to_log($conf, {message => "Not found.", 'line' => __LINE__, level => 2});
}
}
}
else
{
to_log($conf, {message => "Found!", 'line' => __LINE__, level => 3});
next;
}
# Make sure it exists now.
to_log($conf, {message => "Checking again if: [$exe] is at: [".$conf->{path}{exe}{$exe}."].", 'line' => __LINE__, level => 3});
if (not -e $conf->{path}{exe}{$exe})
{
$bad = 1;
if (-e $conf->{path}{exe}{logger})
{
to_log($conf, {message => "Failed to find executable: [$exe]. Unable to proceed.", 'line' => __LINE__, level => 0});
}
else
{
warn "Failed to find executable: [$exe]. Unable to proceed.\n";
}
}
}
if ($bad)
{
exit(1);
}
return(0);
}
# This checks to see if 'cluster::target_node' is out of the pacemaker cluster.
sub check_peer_is_fenced
{
my ($conf) = @_;
my $fenced = 0;
my $body = read_cib($conf);
foreach my $hash_ref (@{$body->{status}{node_state}})
{
my $node = $hash_ref->{uname};
my $join = $hash_ref->{'join'};
my $expected = $hash_ref->{expected};
to_log($conf, {message => "node: [$node] join: [$join], expected: [$expected]", 'line' => __LINE__, level => 3});
if ($node eq $conf->{cluster}{target_node})
{
to_log($conf, {message => "Checking the status of target node: [$node].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down"))
{
# The node is out.
to_log($conf, {message => "The node: [$node] has been fenced successfully.", 'line' => __LINE__, level => 1});
exit(7);
}
else
{
to_log($conf, {message => "The node: [$node] is: [$join/$expected] (join/expected). It has not yet been fenced.", 'line' => __LINE__, level => 1});
}
}
}
return(0);
}
# This kills remote node.
sub kill_target
{
my ($conf) = @_;
# Variables
my $shell_call = $conf->{path}{exe}{stonith_admin}." --fence ".$conf->{cluster}{target_node}." --verbose; RC=\$?; ".$conf->{path}{exe}{crm_error}." \$RC; ".$conf->{path}{exe}{echo}." rc:\$RC";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2});
}
close $file_handle;
### NOTE: Should we just wait forever?
# Now loop and wait for it to be stonithed. We'll wait up to five minutes.
my $start_time = time;
my $end_time = $start_time + 300;
my $fenced = 0;
to_log($conf, {message => "start_time: [$start_time], end_time: [$end_time]", 'line' => __LINE__, level => 2});
until ($fenced)
{
# This will exit
check_peer_is_fenced($conf);
if (time > $end_time)
{
# Done waiting, failed.
to_log($conf, {message => "The node has not been fenced after five minutes. Giving up..", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
else
{
sleep 5;
}
}
# This should not be reachable.
return(0);
}
# Log file entries
sub to_log
{
my ($conf, $parameters) = @_;
my $facility = defined $parameters->{facility} ? $parameters->{facility} : $conf->{'log'}{facility};
my $level = defined $parameters->{level} ? $parameters->{level} : 1;
my $line = defined $parameters->{'line'} ? $parameters->{'line'} : 0;
my $message = defined $parameters->{message} ? $parameters->{message} : "";
my $priority = defined $parameters->{priority} ? $parameters->{priority} : "";
# Leave if we don't care about this message
return if $level > $conf->{'log'}{level};
return if not $message;
# Build the message. We log the line
if (($conf->{'log'}{line_numbers}) && ($line))
{
$message = $line."; ".$message;
}
my $priority_string = $facility;
if ($priority)
{
$priority_string .= ".".$priority;
}
elsif ($level eq "0")
{
$priority_string .= ".notice";
}
elsif (($level eq "1") or ($level eq "2"))
{
$priority_string .= ".info";
}
else
{
$priority_string .= ".debug";
}
# Clean up the string for bash
$message =~ s/"/\\\"/gs;
#$message =~ s/\(/\\\(/gs;
my $shell_call = $conf->{path}{exe}{logger}." --priority ".$priority_string." --tag ".$conf->{'log'}{tag}." -- \"".$message."\"";
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
print "Unexpected logging output: [".$line."]\n";
}
close $file_handle;
return(0);
}