* This completes the rework of DRBD triggered fencing to use / clear location constraints instead of triggering a power fence.

* Added the new unfence_pacemaker DRBD unfence handler.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 2 years ago
parent 4ba1982183
commit 4fa8d7a446
  1. 1
      Anvil/Tools.pm
  2. 42
      Anvil/Tools/DRBD.pm
  3. 3
      tools/Makefile.am
  4. 270
      tools/fence_pacemaker
  5. 646
      tools/unfence_pacemaker

@ -1274,6 +1274,7 @@ sub _set_paths
tput => "/usr/bin/tput", tput => "/usr/bin/tput",
'tr' => "/usr/bin/tr", 'tr' => "/usr/bin/tr",
uname => "/usr/bin/uname", uname => "/usr/bin/uname",
unfence_pacemaker => "/usr/sbin/unfence_pacemaker",
unzip => "/usr/bin/unzip", unzip => "/usr/bin/unzip",
useradd => "/usr/sbin/useradd", useradd => "/usr/sbin/useradd",
usermod => "/usr/sbin/usermod", usermod => "/usr/sbin/usermod",

@ -2755,6 +2755,7 @@ sub update_global_common
my $usage_count_seen = 0; my $usage_count_seen = 0;
my $udev_always_use_vnr_seen = 0; my $udev_always_use_vnr_seen = 0;
my $fence_peer_seen = 0; my $fence_peer_seen = 0;
my $unfence_peer_seen = 0;
my $auto_promote_seen = 0; my $auto_promote_seen = 0;
my $disk_flushes_seen = 0; my $disk_flushes_seen = 0;
my $md_flushes_seen = 0; my $md_flushes_seen = 0;
@ -2778,6 +2779,7 @@ sub update_global_common
# These values will be used to track where we are in processing the config file and what values are needed. # These values will be used to track where we are in processing the config file and what values are needed.
my $say_usage_count = $usage_count ? "yes" : "no"; my $say_usage_count = $usage_count ? "yes" : "no";
my $say_fence_peer = $anvil->data->{path}{exe}{fence_pacemaker}; my $say_fence_peer = $anvil->data->{path}{exe}{fence_pacemaker};
my $say_unfence_peer = $anvil->data->{path}{exe}{unfence_pacemaker};
my $say_auto_promote = "yes"; my $say_auto_promote = "yes";
my $say_flushes = $use_flushes ? "yes" : "no"; my $say_flushes = $use_flushes ? "yes" : "no";
my $say_allow_two_primaries = "no"; my $say_allow_two_primaries = "no";
@ -2865,6 +2867,17 @@ sub update_global_common
$new_global_common .= $new_line."\n"; $new_global_common .= $new_line."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }});
} }
if (not $unfence_peer_seen)
{
$update = 1;
my $new_line = "\t\tunfence-peer ".$say_unfence_peer.";";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:update' => $update,
's2:new_line' => $new_line,
}});
$new_global_common .= $new_line."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }});
}
} }
elsif ($in_startup) elsif ($in_startup)
{ {
@ -3115,6 +3128,35 @@ sub update_global_common
's2:new_line' => $new_line, 's2:new_line' => $new_line,
}}); }});
$new_global_common .= $new_line.$comment."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }});
next;
}
}
if ($line =~ /(\s*)unfence-peer(\s+)(.*?)(;.*)$/)
{
my $left_space = $1;
my $middle_space = $2;
my $value = $3;
my $right_side = $4;
$unfence_peer_seen = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:left_space' => $left_space,
's2:middle_space' => $middle_space,
's3:value' => $value,
's4:right_side' => $right_side,
's5:unfence_peer_seen' => $fence_peer_seen,
}});
if ($value ne $say_unfence_peer)
{
$update = 1;
my $new_line = $left_space."unfence-peer".$middle_space.$say_unfence_peer.$right_side;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:update' => $update,
's2:new_line' => $new_line,
}});
$new_global_common .= $new_line.$comment."\n"; $new_global_common .= $new_line.$comment."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0518", variables => { file => $anvil->data->{path}{configs}{'global-common.conf'}, line => $line }});
next; next;

@ -67,7 +67,8 @@ fencedir = ${FASEXECPREFIX}/sbin
dist_fence_SCRIPTS = \ dist_fence_SCRIPTS = \
fence_delay \ fence_delay \
fence_pacemaker fence_pacemaker \
unfence_pacemaker
sharedir = ${datarootdir}/anvil sharedir = ${datarootdir}/anvil

@ -75,17 +75,20 @@ my $conf = {
path => { path => {
exe => { exe => {
cibadmin => "/usr/sbin/cibadmin", cibadmin => "/usr/sbin/cibadmin",
crm_attribute => "/usr/sbin/crm_attribute",
crm_error => "/usr/sbin/crm_error", crm_error => "/usr/sbin/crm_error",
drbdadm => "/usr/sbin/drbdadm", drbdadm => "/usr/sbin/drbdadm",
echo => "/usr/bin/echo", echo => "/usr/bin/echo",
getent => "/usr/bin/getent", getent => "/usr/bin/getent",
logger => "/usr/bin/logger", logger => "/usr/bin/logger",
pcs => "/usr/sbin/pcs",
stonith_admin => "/usr/sbin/stonith_admin", stonith_admin => "/usr/sbin/stonith_admin",
}, },
}, },
# The script will set this. # The script will set this.
cluster => { cluster => {
target_node => "", target_node => "",
fence_method => "stonith", # Will change to 'constraint' if we're using a DRBD resource is passed.
}, },
# These are the environment variables set by DRBD. See 'man drbd.conf' # These are the environment variables set by DRBD. See 'man drbd.conf'
# -> 'handlers'. # -> 'handlers'.
@ -112,7 +115,7 @@ my $conf = {
find_executables($conf); find_executables($conf);
# Something for the logs # Something for the logs
to_log($conf, {message => "DRBD pacemaker's stonith handler invoked.", 'line' => __LINE__}); to_log($conf, {message => "The Anvil! DRBD fence handler invoked.", 'line' => __LINE__});
# These are the full host names of the nodes given their IDs. # These are the full host names of the nodes given their IDs.
foreach my $i (0..31) foreach my $i (0..31)
@ -122,7 +125,7 @@ foreach my $i (0..31)
{ {
$conf->{environment}{$key} = $ENV{$key}; $conf->{environment}{$key} = $ENV{$key};
my $level = $conf->{environment}{$key} eq "" ? 3 : 2; my $level = $conf->{environment}{$key} eq "" ? 3 : 2;
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level}); to_log($conf, {message => "DRBD Environment variable: [".$key."] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level});
} }
} }
@ -131,13 +134,13 @@ foreach my $key (sort {$a cmp $b} keys %{$conf->{environment}})
{ {
# $conf->{environment}{DRBD_RESOURCE} -> [srv51-Workstation3] # $conf->{environment}{DRBD_RESOURCE} -> [srv51-Workstation3]
my $level = $conf->{environment}{$key} eq "" ? 3 : 2; my $level = $conf->{environment}{$key} eq "" ? 3 : 2;
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level}); to_log($conf, {message => "DRBD Environment variable: [".$key."] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level});
} }
foreach my $key (sort {$a cmp $b} keys %ENV) foreach my $key (sort {$a cmp $b} keys %ENV)
{ {
next if exists $conf->{environment}{$key}; next if exists $conf->{environment}{$key};
my $level = $ENV{$key} eq "" ? 3 : 2; my $level = $ENV{$key} eq "" ? 3 : 2;
to_log($conf, {message => "System Environment variable: [$key] -> [".$ENV{$key}."]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "System Environment variable: [".$key."] -> [".$ENV{$key}."]", 'line' => __LINE__, level => 3});
} }
# Make sure we at least have the target's IP. # Make sure we at least have the target's IP.
@ -153,20 +156,22 @@ identify_peer($conf);
# If we're still alive, we now need to check the DRBD resource disk state locally. # If we're still alive, we now need to check the DRBD resource disk state locally.
get_drbd_status($conf); get_drbd_status($conf);
to_log($conf, {message => "Ready to fence: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
# Is there a specific resource? # Is there a specific resource?
if ($conf->{environment}{DRBD_RESOURCE}) if ($conf->{environment}{DRBD_RESOURCE})
{ {
# Prevent the resource from running on the peer. # Prevent the resource from running on the peer.
create_constraint($conf); to_log($conf, {message => "We're being asked to fence the specific resource: [".$conf->{environment}{DRBD_RESOURCE}."]. Switching to location constraint fencing to prevent the resource from running on: [".$conf->{cluster}{target_node}."].", 'line' => __LINE__});
$conf->{cluster}{fence_method} = "constraint";
to_log($conf, {message => "cluster::fence_method: [".$conf->{cluster}{fence_method}."].", 'line' => __LINE__, level => 2});
} }
else else
{ {
# No, do the deed to_log($conf, {message => "We were not given a specific resource to fence. Requesting pacemaker to stonith. Node: [".$conf->{cluster}{target_node}."] should power power off..", 'line' => __LINE__});
kill_target($conf);
} }
# No, do the deed
perform_fence($conf);
# If we hit here, something very wrong happened. # If we hit here, something very wrong happened.
exit(1); exit(1);
@ -180,11 +185,150 @@ sub create_constraint
{ {
my ($conf) = @_; my ($conf) = @_;
my $resource = $conf->{environment}{DRBD_RESOURCE}; my $target_server = $conf->{environment}{DRBD_RESOURCE};
my $target_node = $conf->{cluster}{target_node}; my $target_node = $conf->{cluster}{target_node};
to_log($conf, {message => "Will now create a location constraint against: [".$resource."] preventing it from running on: [".$target_node."].", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Will now create a location constraint against: [".$target_server."] preventing it from running on: [".$target_node."].", 'line' => __LINE__, level => 1});
# Make sure there's a rule to apply the node attribute against.
my $rule_found = 0;
my $rule_name = "drbd-fenced_".$target_server;
my $shell_call = $conf->{path}{exe}{pcs}." constraint location config show ".$target_server;
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ($line =~ /Expression: $rule_name /)
{
$rule_found = 1;
to_log($conf, {message => "rule_found: [".$rule_found."]", 'line' => __LINE__, level => 2});
last;
}
}
close $file_handle;
if (not $rule_found)
{
# We can't fence.
to_log($conf, {message => "The fence rule: [".$rule_name."] was now found in the cluster, unable to fence by constraint.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# Set the node attribute
my $rule_set = 0;
$shell_call = $conf->{path}{exe}{crm_attribute}." --type nodes --node ".$target_node." --name ".$rule_name." --update 1";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open ($file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
}
close $file_handle;
# Check that the rule was set.
$rule_set = 0;
my $rule_output = "";
$shell_call = $conf->{path}{exe}{crm_attribute}." --type nodes --node ".$target_node." --name ".$rule_name." --query";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open ($file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if (($line =~ /name=$rule_name/) && ($line =~ /value=1/))
{
$rule_set = 1;
to_log($conf, {message => "rule_set: [".$rule_set."]", 'line' => __LINE__, level => 2});
last;
}
else
{
$rule_output .= $line."\n";
}
}
close $file_handle;
if (not $rule_set)
{
# We can't fence.
$rule_output =~ s/\n$//gs;
to_log($conf, {message => "The node attribute triggering the fence rule: [".$rule_name."] against the node: [".$target_node."] appears to have not been set. Expected a string with 'name=".$rule_name." value=1' but got: [".$rule_output."].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# Place the constraint. and wait up to a minute for the target's DRBD resource to not be primary (in
# case the resource is running on the peer and needs to migrate here, which should not happen but
# best to check and be safe).
my $stop_waiting = time + 60;
my $peer_rolee = "";
my $waiting = 1;
while ($waiting)
{
# Check the peer's disk state
my $shell_call = $conf->{path}{exe}{drbdadm}." status ".$target_server;
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
if ($line =~ /$target_node role:(.*)$/)
{
$peer_rolee = $1;
to_log($conf, {message => "peer_rolee: [".$peer_rolee."]", 'line' => __LINE__, level => 2});
last;
}
}
close $file_handle;
if (lc($peer_rolee) ne "primary")
{
# We're good, fence is complete.
to_log($conf, {message => "Resource: [".$target_server."] has been fenced via location constraint successfully!", 'line' => __LINE__, level => 1});
exit(7);
}
if (time > $stop_waiting)
{
# Done waiting, failed.
to_log($conf, {message => "The resource: [".$target_server."] on the fence target: [".$target_node."] is still in the 'Primary' role after a minute. Is the server running on the target? Is something else holding it open? Giving up on the location constraint fence attempt.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# Check again in a couple seconds.
sleep 2;
}
return(0);
}
sub perform_fence
{
my ($conf) = @_;
if ($conf->{cluster}{fence_method} eq "constraint")
{
create_constraint($conf);
}
else
{
kill_target($conf);
}
return(0); return(0);
} }
@ -200,32 +344,32 @@ sub get_drbd_status
my $local_all_uptodate = 1; my $local_all_uptodate = 1;
my $peer_all_uptodate = 1; my $peer_all_uptodate = 1;
my $shell_call = $conf->{path}{exe}{drbdadm}." status all"; my $shell_call = $conf->{path}{exe}{drbdadm}." status all";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>) while(<$file_handle>)
{ {
# This should not generate output. # This should not generate output.
chomp; chomp;
my $line = $_; my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
if (not $line) if (not $line)
{ {
$resource = ""; $resource = "";
$peer = ""; $peer = "";
to_log($conf, {message => "resource: [$resource], peer: [$peer]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "resource: [".$resource."], peer: [".$peer."]", 'line' => __LINE__, level => 3});
next; next;
} }
if ($line =~ /^(\S+)\s+role/) if ($line =~ /^(\S+)\s+role/)
{ {
$resource = $1; $resource = $1;
to_log($conf, {message => "resource: [$resource]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "resource: [".$resource."]", 'line' => __LINE__, level => 3});
next; next;
} }
if ($line =~ /^\s+(.*?) role:/) if ($line =~ /^\s+(.*?) role:/)
{ {
$peer = $1; $peer = $1;
to_log($conf, {message => "peer: [$peer]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "peer: [".$peer."]", 'line' => __LINE__, level => 3});
next; next;
} }
if ($resource) if ($resource)
@ -234,11 +378,11 @@ sub get_drbd_status
{ {
my $local_dstate = $1; my $local_dstate = $1;
$local_dstate =~ s/\s.*$//; $local_dstate =~ s/\s.*$//;
to_log($conf, {message => "local_dstate: [$local_dstate]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "local_dstate: [".$local_dstate."]", 'line' => __LINE__, level => 2});
if (lc($local_dstate) ne "uptodate") if (lc($local_dstate) ne "uptodate")
{ {
$local_all_uptodate = 0; $local_all_uptodate = 0;
to_log($conf, {message => "local_all_uptodate: [$local_all_uptodate]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "local_all_uptodate: [".$local_all_uptodate."]", 'line' => __LINE__, level => 2});
} }
next; next;
} }
@ -246,11 +390,11 @@ sub get_drbd_status
{ {
my $peer_dstate = $1; my $peer_dstate = $1;
$peer_dstate =~ s/\s.*$//; $peer_dstate =~ s/\s.*$//;
to_log($conf, {message => "peer: [$peer], peer_dstate: [$peer_dstate]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "peer: [".$peer."], peer_dstate: [".$peer_dstate."]", 'line' => __LINE__, level => 2});
if (lc($peer_dstate) ne "uptodate") if (lc($peer_dstate) ne "uptodate")
{ {
$peer_all_uptodate = 0; $peer_all_uptodate = 0;
to_log($conf, {message => "peer_all_uptodate: [$peer_all_uptodate]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "peer_all_uptodate: [".$peer_all_uptodate."]", 'line' => __LINE__, level => 2});
} }
next; next;
} }
@ -258,11 +402,9 @@ sub get_drbd_status
} }
close $file_handle; close $file_handle;
my $return_code = $?;
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2});
# If we're not all UpToDate, but the peer is, abort # If we're not all UpToDate, but the peer is, abort
to_log($conf, {message => "local_all_uptodate: [$local_all_uptodate], peer_all_uptodate: [$peer_all_uptodate]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "local_all_uptodate: [".$local_all_uptodate."], peer_all_uptodate: [".$peer_all_uptodate."]", 'line' => __LINE__, level => 2});
if ((not $local_all_uptodate) && ($peer_all_uptodate)) if ((not $local_all_uptodate) && ($peer_all_uptodate))
{ {
# We're not good # We're not good
@ -284,30 +426,28 @@ sub identify_peer
# First, can we translate the IP to a host name? # First, can we translate the IP to a host name?
my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip; my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip;
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>) while(<$file_handle>)
{ {
# This should not generate output. # This should not generate output.
chomp; chomp;
my $line = $_; my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ($line =~ /^$target_ip\s+(.*)$/) if ($line =~ /^$target_ip\s+(.*)$/)
{ {
# This could be multiple names. # This could be multiple names.
$target_host = $1; $target_host = $1;
to_log($conf, {message => "target_host: [$target_host]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
#to_log($conf, {message => ">> target_host: [$target_host]", 'line' => __LINE__, level => 2}); #to_log($conf, {message => ">> target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
# Strip off any suffix, we only want the short name. # Strip off any suffix, we only want the short name.
#$target_host =~ s/\..*//; #$target_host =~ s/\..*//;
#to_log($conf, {message => "<< target_host: [$target_host]", 'line' => __LINE__, level => 2}); #to_log($conf, {message => "<< target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
#last; #last;
} }
} }
close $file_handle; close $file_handle;
my $return_code = $?;
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 2});
# If I got the host name, try to match it to a pacemaker node name. # If I got the host name, try to match it to a pacemaker node name.
if ($target_host) if ($target_host)
@ -320,18 +460,18 @@ sub identify_peer
my $host_name = $ENV{HOSTNAME}; my $host_name = $ENV{HOSTNAME};
my $short_host_name = $ENV{HOSTNAME}; my $short_host_name = $ENV{HOSTNAME};
$short_host_name =~ s/\..*$//; $short_host_name =~ s/\..*$//;
to_log($conf, {message => "host_name: [$host_name], short_host_name: [".$short_host_name."]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "host_name: [".$host_name."], short_host_name: [".$short_host_name."]", 'line' => __LINE__, level => 2});
foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}}) foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}})
{ {
my $node = $hash_ref->{uname}; my $node = $hash_ref->{uname};
my $id = $hash_ref->{id}; my $id = $hash_ref->{id};
to_log($conf, {message => "node: [$node], id: [$id]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "node: [".$node."], id: [".$id."]", 'line' => __LINE__, level => 2});
foreach my $target_name (split/ /, $target_host) foreach my $target_name (split/ /, $target_host)
{ {
to_log($conf, {message => ">> target_name: [$target_name]", 'line' => __LINE__, level => 2}); to_log($conf, {message => ">> target_name: [".$target_name."]", 'line' => __LINE__, level => 2});
$target_name =~ s/\..*//; $target_name =~ s/\..*//;
to_log($conf, {message => "<< target_name: [$target_name]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "<< target_name: [".$target_name."]", 'line' => __LINE__, level => 2});
if ($node =~ /^$target_name/) if ($node =~ /^$target_name/)
{ {
$conf->{cluster}{target_node} = $node; $conf->{cluster}{target_node} = $node;
@ -345,7 +485,7 @@ sub identify_peer
# We've got some data... # We've got some data...
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : ""; my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : "";
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : ""; my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : "";
to_log($conf, {message => "node: [$node] instance attribyte name: [$name], value: [$value]", 'line' => __LINE__, level => 1}); to_log($conf, {message => "node: [".$node."] instance attribyte name: [".$name."], value: [".$value."]", 'line' => __LINE__, level => 1});
if (($name eq "maintenance") and ($value eq "on")) if (($name eq "maintenance") and ($value eq "on"))
{ {
# We're in maintenance mode, abort. # We're in maintenance mode, abort.
@ -358,7 +498,7 @@ sub identify_peer
} }
my $quorate = $body->{'have-quorum'}; my $quorate = $body->{'have-quorum'};
to_log($conf, {message => "quorate: [$quorate]", 'line' => __LINE__, level => 1}); to_log($conf, {message => "quorate: [".$quorate."]", 'line' => __LINE__, level => 1});
if (not $quorate) if (not $quorate)
{ {
to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"}); to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"});
@ -371,19 +511,19 @@ sub identify_peer
my $node = $hash_ref->{uname}; my $node = $hash_ref->{uname};
my $join = $hash_ref->{'join'}; my $join = $hash_ref->{'join'};
my $expected = $hash_ref->{expected}; my $expected = $hash_ref->{expected};
to_log($conf, {message => "node: [$node] join: [$join], expected: [$expected]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "node: [".$node."] join: [".$join."], expected: [".$expected."]", 'line' => __LINE__, level => 3});
if ($node eq $conf->{cluster}{target_node}) if ($node eq $conf->{cluster}{target_node})
{ {
to_log($conf, {message => "Checking the status of target node: [$node].", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Checking the status of target node: [".$node."].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down")) if (($join eq "down") && ($expected eq "down"))
{ {
# The node is out. # The node is out.
to_log($conf, {message => "The node: [$node] is already down. No actual fence needed.", 'line' => __LINE__, level => 1}); to_log($conf, {message => "The node: [".$node."] is already down. No actual fence needed.", 'line' => __LINE__, level => 1});
exit(7); exit(7);
} }
else else
{ {
to_log($conf, {message => "The node: [$node] is: [$join/$expected] (join/expected). Proceeding with the fence action.", 'line' => __LINE__, level => 1}); to_log($conf, {message => "The node: [".$node."] is: [".$join."/".$expected."] (join/expected). Proceeding with the fence action.", 'line' => __LINE__, level => 1});
} }
} }
} }
@ -409,14 +549,14 @@ sub read_cib
my $body = ""; my $body = "";
my $cib = '<?xml version="1.0" encoding="UTF-8"?>'; my $cib = '<?xml version="1.0" encoding="UTF-8"?>';
my $shell_call = $conf->{path}{exe}{cibadmin}." --local --query"; my $shell_call = $conf->{path}{exe}{cibadmin}." --local --query";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 3});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>) while(<$file_handle>)
{ {
# This should not generate output. # This should not generate output.
chomp; chomp;
my $line = $_; my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
$cib .= "\n".$line; $cib .= "\n".$line;
if ($line =~ /Signon to CIB failed/i) if ($line =~ /Signon to CIB failed/i)
@ -428,21 +568,19 @@ sub read_cib
if ($line =~ /^<cib .*?>$/) if ($line =~ /^<cib .*?>$/)
{ {
$xml_opened = 1; $xml_opened = 1;
to_log($conf, {message => "xml_opened: [$xml_opened].", 'line' => __LINE__, level => 3}); to_log($conf, {message => "xml_opened: [".$xml_opened."].", 'line' => __LINE__, level => 3});
} }
if ($line =~ /^<\/cib>$/) if ($line =~ /^<\/cib>$/)
{ {
$xml_closed = 1; $xml_closed = 1;
to_log($conf, {message => "xml_closed: [$xml_closed].", 'line' => __LINE__, level => 3}); to_log($conf, {message => "xml_closed: [".$xml_closed."].", 'line' => __LINE__, level => 3});
} }
} }
close $file_handle; close $file_handle;
my $return_code = $?; to_log($conf, {message => "cib: ==========\n".$cib."\n==========", 'line' => __LINE__, level => 3});
to_log($conf, {message => "Return code: [$return_code]", 'line' => __LINE__, level => 3});
to_log($conf, {message => "cib: ==========\n$cib\n==========", 'line' => __LINE__, level => 3});
# Now parse the CIB XML if I read it OK. # Now parse the CIB XML if I read it OK.
to_log($conf, {message => "xml_opened: [$xml_opened], xml_closed: [$xml_closed].", 'line' => __LINE__, level => 3}); to_log($conf, {message => "xml_opened: [".$xml_opened."], xml_closed: [".$xml_closed."].", 'line' => __LINE__, level => 3});
if (($xml_opened) && ($xml_closed)) if (($xml_opened) && ($xml_closed))
{ {
# We're good # We're good
@ -452,7 +590,7 @@ sub read_cib
if (not $test) if (not $test)
{ {
chomp $@; chomp $@;
my $error = "[ Error ] - The was a problem parsing: [$cib]. The error was:\n"; my $error = "[ Error ] - The was a problem parsing: [".$cib."]. The error was:\n";
$error .= "===========================================================\n"; $error .= "===========================================================\n";
$error .= $@."\n"; $error .= $@."\n";
$error .= "===========================================================\n"; $error .= "===========================================================\n";
@ -486,21 +624,21 @@ sub find_executables
{ {
if ( not -e $conf->{path}{exe}{$exe} ) if ( not -e $conf->{path}{exe}{$exe} )
{ {
to_log($conf, {message => "The program: [$exe] is not at: [".$conf->{path}{exe}{$exe}."]. Looking for it now...", 'line' => __LINE__, level => 1}); to_log($conf, {message => "The program: [".$exe."] is not at: [".$conf->{path}{exe}{$exe}."]. Looking for it now...", 'line' => __LINE__, level => 1});
foreach my $path (@dirs) foreach my $path (@dirs)
{ {
$check = "$path/$exe"; $check = "$path/$exe";
$check =~ s/\/\//\//g; $check =~ s/\/\//\//g;
to_log($conf, {message => "Checking: [$check]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Checking: [".$check."]", 'line' => __LINE__, level => 2});
if ( -e $check ) if ( -e $check )
{ {
if (-e $conf->{path}{exe}{logger}) if (-e $conf->{path}{exe}{logger})
{ {
to_log($conf, {message => "Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Found it! Changed path for: [".$exe."] from: [".$conf->{path}{exe}{$exe}."] to: [".$check."]", 'line' => __LINE__, level => 1});
} }
else else
{ {
warn "DEBUG: Found it! Changed path for: [$exe] from: [".$conf->{path}{exe}{$exe}."] to: [$check]\n"; warn "DEBUG: Found it! Changed path for: [".$exe."] from: [".$conf->{path}{exe}{$exe}."] to: [".$check."]\n";
} }
$conf->{path}{exe}{$exe} = $check; $conf->{path}{exe}{$exe} = $check;
} }
@ -517,17 +655,17 @@ sub find_executables
} }
# Make sure it exists now. # Make sure it exists now.
to_log($conf, {message => "Checking again if: [$exe] is at: [".$conf->{path}{exe}{$exe}."].", 'line' => __LINE__, level => 3}); to_log($conf, {message => "Checking again if: [".$exe."] is at: [".$conf->{path}{exe}{$exe}."].", 'line' => __LINE__, level => 3});
if (not -e $conf->{path}{exe}{$exe}) if (not -e $conf->{path}{exe}{$exe})
{ {
$bad = 1; $bad = 1;
if (-e $conf->{path}{exe}{logger}) if (-e $conf->{path}{exe}{logger})
{ {
to_log($conf, {message => "Failed to find executable: [$exe]. Unable to proceed.", 'line' => __LINE__, level => 0}); to_log($conf, {message => "Failed to find executable: [".$exe."]. Unable to proceed.", 'line' => __LINE__, level => 0});
} }
else else
{ {
warn "Failed to find executable: [$exe]. Unable to proceed.\n"; warn "Failed to find executable: [".$exe."]. Unable to proceed.\n";
} }
} }
} }
@ -551,27 +689,27 @@ sub check_peer_is_fenced
my $node = $hash_ref->{uname}; my $node = $hash_ref->{uname};
my $join = $hash_ref->{'join'}; my $join = $hash_ref->{'join'};
my $expected = $hash_ref->{expected}; my $expected = $hash_ref->{expected};
to_log($conf, {message => "node: [$node] join: [$join], expected: [$expected]", 'line' => __LINE__, level => 3}); to_log($conf, {message => "node: [".$node."] join: [".$join."], expected: [".$expected."]", 'line' => __LINE__, level => 3});
if ($node eq $conf->{cluster}{target_node}) if ($node eq $conf->{cluster}{target_node})
{ {
to_log($conf, {message => "Checking the status of target node: [$node].", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Checking the status of target node: [".$node."].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down")) if (($join eq "down") && ($expected eq "down"))
{ {
# The node is out. # The node is out.
to_log($conf, {message => "The node: [$node] has been fenced successfully.", 'line' => __LINE__, level => 1}); to_log($conf, {message => "The node: [".$node."] has been fenced successfully.", 'line' => __LINE__, level => 1});
# Call 'drbdadm adjust all' as it seems like drbd's in-memory can change # Call 'drbdadm adjust all' as it seems like drbd's in-memory can change
# causing 'incompatible <fence option>' on return of the peer. # causing 'incompatible <fence option>' on return of the peer.
to_log($conf, {message => "Reloading DRBD config from disk to ensure in-memory and on-disk configs match.", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Reloading DRBD config from disk to ensure in-memory and on-disk configs match.", 'line' => __LINE__, level => 1});
my $shell_call = $conf->{path}{exe}{drbdadm}." adjust all"; my $shell_call = $conf->{path}{exe}{drbdadm}." adjust all";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>) while(<$file_handle>)
{ {
# This should not generate output. # This should not generate output.
chomp; chomp;
my $line = $_; my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
} }
close $file_handle; close $file_handle;
to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1}); to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1});
@ -580,7 +718,7 @@ sub check_peer_is_fenced
} }
else else
{ {
to_log($conf, {message => "The node: [$node] is: [$join/$expected] (join/expected). It has not yet been fenced.", 'line' => __LINE__, level => 1}); to_log($conf, {message => "The node: [".$node."] is: [".$join."/".$expected."] (join/expected). It has not yet been fenced.", 'line' => __LINE__, level => 1});
} }
} }
} }
@ -595,14 +733,14 @@ sub kill_target
# Variables # Variables
my $shell_call = $conf->{path}{exe}{stonith_admin}." --fence ".$conf->{cluster}{target_node}." --verbose; RC=\$?; ".$conf->{path}{exe}{crm_error}." \$RC; ".$conf->{path}{exe}{echo}." rc:\$RC"; my $shell_call = $conf->{path}{exe}{stonith_admin}." --fence ".$conf->{cluster}{target_node}." --verbose; RC=\$?; ".$conf->{path}{exe}{crm_error}." \$RC; ".$conf->{path}{exe}{echo}." rc:\$RC";
to_log($conf, {message => "Calling: [$shell_call]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>) while(<$file_handle>)
{ {
# This should not generate output. # This should not generate output.
chomp; chomp;
my $line = $_; my $line = $_;
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
} }
close $file_handle; close $file_handle;
@ -611,7 +749,7 @@ sub kill_target
my $start_time = time; my $start_time = time;
my $end_time = $start_time + 300; my $end_time = $start_time + 300;
my $fenced = 0; my $fenced = 0;
to_log($conf, {message => "start_time: [$start_time], end_time: [$end_time]", 'line' => __LINE__, level => 2}); to_log($conf, {message => "start_time: [".$start_time."], end_time: [".$end_time."]", 'line' => __LINE__, level => 2});
until ($fenced) until ($fenced)
{ {
# This will exit # This will exit
@ -619,7 +757,7 @@ sub kill_target
if (time > $end_time) if (time > $end_time)
{ {
# Done waiting, failed. # Done waiting, failed.
to_log($conf, {message => "The node has not been fenced after five minutes. Giving up..", 'line' => __LINE__, level => 0, priority => "err"}); to_log($conf, {message => "The node has not been fenced after five minutes. Giving up.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1); exit(1);
} }
else else

@ -0,0 +1,646 @@
#!/usr/bin/perl
#
# Author: Madison Kelly (mkelly@alteeve.ca)
# Alteeve's Niche! Inc. - https://alteeve.com/w/
# Version: 0.0.1
# License: GPL v2+
#
# This program ties LINBIT's DRBD fencing into pacemaker's stonith. It provides a power-fence alternative to
# the default 'crm-{un,}fence-peer.sh' {un,}fence-handler.
#
# WARNING: This unfence handler is probably not safe to use outside of an Anvil! IA platform. It makes a lot
# of operational assumptions about the system and desired goals.
#
# TODO:
# -
### NOTE: This doesn't use Anvil::Tools on purpose. We want to be quick and depend on as few things as
### possible.
use strict;
use warnings;
use XML::Simple;
use Data::Dumper;
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $conf = {
'log' => {
facility => "local0",
level => 2,
line_numbers => 1,
tag => $THIS_FILE,
},
# If a program isn't at the defined path, $ENV{PATH} will be searched.
path => {
exe => {
cibadmin => "/usr/sbin/cibadmin",
crm_attribute => "/usr/sbin/crm_attribute",
crm_error => "/usr/sbin/crm_error",
drbdadm => "/usr/sbin/drbdadm",
echo => "/usr/bin/echo",
getent => "/usr/bin/getent",
logger => "/usr/bin/logger",
pcs => "/usr/sbin/pcs",
stonith_admin => "/usr/sbin/stonith_admin",
},
},
# The script will set this.
cluster => {
target_node => "",
},
# These are the environment variables set by DRBD. See 'man drbd.conf'
# -> 'handlers'.
environment => {
# The resource triggering the fence.
'DRBD_RESOURCE' => defined $ENV{DRBD_RESOURCE} ? $ENV{DRBD_RESOURCE} : "",
# The resource minor number, or, in the case of volumes, numbers.
'DRBD_MINOR' => defined $ENV{DRBD_MINOR} ? $ENV{DRBD_MINOR} : "",
# This is the address format (ipv4, ipv6, etc)
'DRBD_PEER_AF' => defined $ENV{DRBD_PEER_AF} ? $ENV{DRBD_PEER_AF} : "",
# This is the IP address of the target node.
'DRBD_PEER_ADDRESS' => defined $ENV{DRBD_PEER_ADDRESS} ? $ENV{DRBD_PEER_ADDRESS} : "",
# This isn't set
'DRBD_PEERS' => defined $ENV{DRBD_PEERS} ? $ENV{DRBD_PEERS} : "",
### NOTE: Below here are undocumented variables. Don't expect them to always be useful.
# My node ID
'DRBD_MY_NODE_ID' => defined $ENV{DRBD_MY_NODE_ID} ? $ENV{DRBD_MY_NODE_ID} : "",
# The target's ID
'DRBD_PEER_NODE_ID' => defined $ENV{DRBD_PEER_NODE_ID} ? $ENV{DRBD_PEER_NODE_ID} : "",
},
};
# Find executables.
find_executables($conf);
# Something for the logs
to_log($conf, {message => "The Anvil! DRBD unfence handler has been invoked.", 'line' => __LINE__});
# These are the full host names of the nodes given their IDs.
foreach my $i (0..31)
{
my $key = "DRBD_NODE_ID_".$i;
if ((exists $ENV{$key}) && (defined $ENV{$key}))
{
$conf->{environment}{$key} = $ENV{$key};
my $level = $conf->{environment}{$key} eq "" ? 3 : 2;
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level});
}
}
# Record the environment variables
foreach my $key (sort {$a cmp $b} keys %{$conf->{environment}})
{
# $conf->{environment}{DRBD_RESOURCE} -> [srv51-Workstation3]
my $level = $conf->{environment}{$key} eq "" ? 3 : 2;
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level});
}
foreach my $key (sort {$a cmp $b} keys %ENV)
{
next if exists $conf->{environment}{$key};
my $level = $ENV{$key} eq "" ? 3 : 2;
to_log($conf, {message => "System Environment variable: [$key] -> [".$ENV{$key}."]", 'line' => __LINE__, level => 3});
}
# Make sure we at least have the target's IP.
if (not $conf->{environment}{DRBD_PEER_ADDRESS})
{
to_log($conf, {message => "Called without target's IP. Nothing to do, exiting. Were we called by 'pcs stonith list'?", 'line' => __LINE__, level => 1, priority => "alert"});
exit(1);
}
# This also checks that we're quorate and not in maintenance mode.
identify_peer($conf);
# If we're still alive, we now need to check the DRBD resource disk state locally.
get_drbd_status($conf);
# Is there a specific resource?
if ($conf->{environment}{DRBD_RESOURCE})
{
# Prevent the resource from running on the peer.
to_log($conf, {message => "We're being asked to unfence the specific resource: [".$conf->{environment}{DRBD_RESOURCE}."]. Will remove the node attribute blocking this server from running on: [".$conf->{cluster}{target_node}."].", 'line' => __LINE__});
remove_constraint($conf);
}
else
{
to_log($conf, {message => "We were not given a specific resource to unfence. Nothing to do.", 'line' => __LINE__});
exit(0);
}
# If we hit here, something very wrong happened.
exit(1);
#############################################################################################################
# Functions #
#############################################################################################################
# This removes a location constraint that prevents the resource / server from running on the peer node.
sub remove_constraint
{
my ($conf) = @_;
my $target_server = $conf->{environment}{DRBD_RESOURCE};
my $target_node = $conf->{cluster}{target_node};
to_log($conf, {message => "Will now create a location constraint against: [".$target_server."] preventing it from running on: [".$target_node."].", 'line' => __LINE__, level => 1});
# Check that the rule was set.
my $rule_name = "drbd-fenced_".$target_server;
my $rule_set = 1;
my $rule_found = 0;
my $rule_output = "";
my $shell_call = $conf->{path}{exe}{crm_attribute}." --type nodes --node ".$target_node." --name ".$rule_name." --query";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if (($line =~ /name=$rule_name/) && ($line =~ /value=0/))
{
$rule_set = 0;
to_log($conf, {message => "rule_set: [".$rule_set."]", 'line' => __LINE__, level => 2});
last;
}
else
{
$rule_output .= $line."\n";
}
}
close $file_handle;
if (not $rule_set)
{
# No need to unfence.
to_log($conf, {message => "The node attribute rule: [".$rule_name."] against the node: [".$target_node."] was not found. No need to unfence.", 'line' => __LINE__, level => 0, priority => "err"});
exit(0);
}
# Clear the node attribute
$shell_call = $conf->{path}{exe}{crm_attribute}." --type nodes --node ".$target_node." --name ".$rule_name." --update 0";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open ($file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
}
close $file_handle;
# Check that the rule was set.
$rule_output = "";
$shell_call = $conf->{path}{exe}{crm_attribute}." --type nodes --node ".$target_node." --name ".$rule_name." --query";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open ($file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if (($line =~ /name=$rule_name/) && ($line =~ /value=0/))
{
$rule_set = 0;
to_log($conf, {message => "rule_set: [".$rule_set."]", 'line' => __LINE__, level => 2});
last;
}
else
{
$rule_output .= $line."\n";
}
}
close $file_handle;
if (not $rule_set)
{
# Success!
to_log($conf, {message => "The node attribute rule: [".$rule_name."] against the node: [".$target_node."] has been cleared successfully.", 'line' => __LINE__, level => 0, priority => "err"});
exit(0);
}
else
{
# Failed.
$rule_output =~ s/\n$//gs;
to_log($conf, {message => "The node attribute triggering the fence rule: [".$rule_name."] against the node: [".$target_node."] appears to have not been cleared. Expected a string with 'name=".$rule_name." value=0' but got: [".$rule_output."].", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is,
# abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live)
sub get_drbd_status
{
my ($conf) = @_;
my $resource = "";
my $peer = "";
my $local_all_uptodate = 1;
my $peer_all_uptodate = 1;
my $shell_call = $conf->{path}{exe}{drbdadm}." status all";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
if (not $line)
{
$resource = "";
$peer = "";
to_log($conf, {message => "resource: [".$resource."], peer: [".$peer."]", 'line' => __LINE__, level => 3});
next;
}
if ($line =~ /^(\S+)\s+role/)
{
$resource = $1;
to_log($conf, {message => "resource: [".$resource."]", 'line' => __LINE__, level => 3});
next;
}
if ($line =~ /^\s+(.*?) role:/)
{
$peer = $1;
to_log($conf, {message => "peer: [".$peer."]", 'line' => __LINE__, level => 3});
next;
}
if ($resource)
{
if ($line =~ /disk:(.*)$/)
{
my $local_dstate = $1;
$local_dstate =~ s/\s.*$//;
to_log($conf, {message => "local_dstate: [".$local_dstate."]", 'line' => __LINE__, level => 2});
if (lc($local_dstate) ne "uptodate")
{
$local_all_uptodate = 0;
to_log($conf, {message => "local_all_uptodate: [".$local_all_uptodate."]", 'line' => __LINE__, level => 2});
}
next;
}
if ($line =~ /peer-disk:(.*)$/)
{
my $peer_dstate = $1;
$peer_dstate =~ s/\s.*$//;
to_log($conf, {message => "peer: [".$peer."], peer_dstate: [".$peer_dstate."]", 'line' => __LINE__, level => 2});
if (lc($peer_dstate) ne "uptodate")
{
$peer_all_uptodate = 0;
to_log($conf, {message => "peer_all_uptodate: [".$peer_all_uptodate."]", 'line' => __LINE__, level => 2});
}
next;
}
}
}
close $file_handle;
# If we're not all UpToDate, but the peer is, abort
to_log($conf, {message => "local_all_uptodate: [".$local_all_uptodate."], peer_all_uptodate: [".$peer_all_uptodate."]", 'line' => __LINE__, level => 2});
if ((not $local_all_uptodate) && ($peer_all_uptodate))
{
# We're not good
to_log($conf, {message => "This node has DRBD resources that are not UpToDate, but the peer is fully UpToDate. Aborting.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This identifies the pacemaker name of the target node. If it can't find the peer, it exits with '1'.
sub identify_peer
{
my ($conf) = @_;
# I know the target's (SN) IP, map it to a node.
my $target_host = "";
my $target_ip = $conf->{environment}{DRBD_PEER_ADDRESS};
# First, can we translate the IP to a host name?
my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip;
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ($line =~ /^$target_ip\s+(.*)$/)
{
# This could be multiple names.
$target_host = $1;
to_log($conf, {message => "target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
#to_log($conf, {message => ">> target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
# Strip off any suffix, we only want the short name.
#$target_host =~ s/\..*//;
#to_log($conf, {message => "<< target_host: [".$target_host."]", 'line' => __LINE__, level => 2});
#last;
}
}
close $file_handle;
# If I got the host name, try to match it to a pacemaker node name.
if ($target_host)
{
# Get the current CIB (in an XML::Simple hash). This will exit if it fails to read the XML
# and convert it to an XML::Simple hash.
my $body = read_cib($conf);
# Parse the XML.
my $host_name = $ENV{HOSTNAME};
my $short_host_name = $ENV{HOSTNAME};
$short_host_name =~ s/\..*$//;
to_log($conf, {message => "host_name: [".$host_name."], short_host_name: [".$short_host_name."]", 'line' => __LINE__, level => 2});
foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}})
{
my $node = $hash_ref->{uname};
my $id = $hash_ref->{id};
to_log($conf, {message => "node: [".$node."], id: [".$id."]", 'line' => __LINE__, level => 2});
foreach my $target_name (split/ /, $target_host)
{
to_log($conf, {message => ">> target_name: [".$target_name."]", 'line' => __LINE__, level => 2});
$target_name =~ s/\..*//;
to_log($conf, {message => "<< target_name: [".$target_name."]", 'line' => __LINE__, level => 2});
if ($node =~ /^$target_name/)
{
$conf->{cluster}{target_node} = $node;
to_log($conf, {message => "Found the pacemaker name of the target node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
}
elsif ($node =~ /^$short_host_name/)
{
# This is me. Am I in maintenance mode?
if (exists $hash_ref->{instance_attributes})
{
# We've got some data..
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : "";
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : "";
to_log($conf, {message => "node: [".$node."] instance attribyte name: [".$name."], value: [".$value."]", 'line' => __LINE__, level => 1});
if (($name eq "maintenance") and ($value eq "on"))
{
# We're in maintenance mode, abort.
to_log($conf, {message => "This node is in maintenance mode. Not able to fence!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
}
}
my $quorate = $body->{'have-quorum'};
to_log($conf, {message => "quorate: [".$quorate."]", 'line' => __LINE__, level => 1});
if (not $quorate)
{
to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
# See if the target node is already out of the cluster.
foreach my $hash_ref (@{$body->{status}{node_state}})
{
my $node = $hash_ref->{uname};
my $join = $hash_ref->{'join'};
my $expected = $hash_ref->{expected};
to_log($conf, {message => "node: [".$node."] join: [".$join."], expected: [".$expected."]", 'line' => __LINE__, level => 3});
if ($node eq $conf->{cluster}{target_node})
{
to_log($conf, {message => "Checking the status of target node: [".$node."].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down"))
{
# The node is out.
to_log($conf, {message => "The node: [".$node."] is already down. No actual fence needed.", 'line' => __LINE__, level => 1});
exit(7);
}
else
{
to_log($conf, {message => "The node: [".$node."] is: [".$join."/".$expected."] (join/expected). Proceeding with the fence action.", 'line' => __LINE__, level => 1});
}
}
}
}
# Did I find the target?
if (not $conf->{cluster}{target_node})
{
to_log($conf, {message => "Failed to find the pacemaker name of the target node. Unable to proceed!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This reads in the CIB XML and returns it as a single multi-line string.
sub read_cib
{
my ($conf) = @_;
my $xml_opened = 0;
my $xml_closed = 0;
my $body = "";
my $cib = '<?xml version="1.0" encoding="UTF-8"?>';
my $shell_call = $conf->{path}{exe}{cibadmin}." --local --query";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 3});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 3});
$cib .= "\n".$line;
if ($line =~ /Signon to CIB failed/i)
{
# Failed to connect, we're probably not in the cluster.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to get the CIB status.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
if ($line =~ /^<cib .*?>$/)
{
$xml_opened = 1;
to_log($conf, {message => "xml_opened: [".$xml_opened."].", 'line' => __LINE__, level => 3});
}
if ($line =~ /^<\/cib>$/)
{
$xml_closed = 1;
to_log($conf, {message => "xml_closed: [".$xml_closed."].", 'line' => __LINE__, level => 3});
}
}
close $file_handle;
to_log($conf, {message => "cib: ==========\n".$cib."\n==========", 'line' => __LINE__, level => 3});
# Now parse the CIB XML if I read it OK.
to_log($conf, {message => "xml_opened: [".$xml_opened."], xml_closed: [".$xml_closed."].", 'line' => __LINE__, level => 3});
if (($xml_opened) && ($xml_closed))
{
# We're good
local $@;
my $xml = XML::Simple->new();
my $test = eval { $body = $xml->XMLin($cib, KeyAttr => { language => 'name', key => 'name' }, ForceArray => [ 'id' ]) };
if (not $test)
{
chomp $@;
my $error = "[ Error ] - The was a problem parsing: [".$cib."]. The error was:\n";
$error .= "===========================================================\n";
$error .= $@."\n";
$error .= "===========================================================\n";
to_log($conf, {message => $error, 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
else
{
# Failed to read the CIB XML.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to read the CIB XML properly.", 'line' => __LINE__, level => 2, priority => "err"});
exit(1);
}
return($body);
}
# This checks the given paths and, if something isn't found, it searches PATH trying to find it.
sub find_executables
{
my ($conf) = @_;
# Variables.
my $check = "";
my $bad = 0;
# Log entries can only happen if I've found 'logger', so an extra check will be made on 'to_log'
# calls.
my @dirs = split/:/, $ENV{PATH};
foreach my $exe (sort {$b cmp $a} keys %{$conf->{path}{exe}})
{
if ( not -e $conf->{path}{exe}{$exe} )
{
to_log($conf, {message => "The program: [".$exe."] is not at: [".$conf->{path}{exe}{$exe}."]. Looking for it now...", 'line' => __LINE__, level => 1});
foreach my $path (@dirs)
{
$check = "$path/$exe";
$check =~ s/\/\//\//g;
to_log($conf, {message => "Checking: [".$check."]", 'line' => __LINE__, level => 2});
if ( -e $check )
{
if (-e $conf->{path}{exe}{logger})
{
to_log($conf, {message => "Found it! Changed path for: [".$exe."] from: [".$conf->{path}{exe}{$exe}."] to: [".$check."]", 'line' => __LINE__, level => 1});
}
else
{
warn "DEBUG: Found it! Changed path for: [".$exe."] from: [".$conf->{path}{exe}{$exe}."] to: [".$check."]\n";
}
$conf->{path}{exe}{$exe} = $check;
}
else
{
to_log($conf, {message => "Not found.", 'line' => __LINE__, level => 2});
}
}
}
else
{
to_log($conf, {message => "Found!", 'line' => __LINE__, level => 3});
next;
}
# Make sure it exists now.
to_log($conf, {message => "Checking again if: [".$exe."] is at: [".$conf->{path}{exe}{$exe}."].", 'line' => __LINE__, level => 3});
if (not -e $conf->{path}{exe}{$exe})
{
$bad = 1;
if (-e $conf->{path}{exe}{logger})
{
to_log($conf, {message => "Failed to find executable: [".$exe."]. Unable to proceed.", 'line' => __LINE__, level => 0});
}
else
{
warn "Failed to find executable: [".$exe."]. Unable to proceed.\n";
}
}
}
if ($bad)
{
exit(1);
}
return(0);
}
# Log file entries
sub to_log
{
my ($conf, $parameters) = @_;
my $facility = defined $parameters->{facility} ? $parameters->{facility} : $conf->{'log'}{facility};
my $level = defined $parameters->{level} ? $parameters->{level} : 1;
my $line = defined $parameters->{'line'} ? $parameters->{'line'} : 0;
my $message = defined $parameters->{message} ? $parameters->{message} : "";
my $priority = defined $parameters->{priority} ? $parameters->{priority} : "";
# Leave if we don't care about this message
return if $level > $conf->{'log'}{level};
return if not $message;
# Build the message. We log the line
if (($conf->{'log'}{line_numbers}) && ($line))
{
$message = $line."; ".$message;
}
my $priority_string = $facility;
if ($priority)
{
$priority_string .= ".".$priority;
}
elsif ($level eq "0")
{
$priority_string .= ".notice";
}
elsif (($level eq "1") or ($level eq "2"))
{
$priority_string .= ".info";
}
else
{
$priority_string .= ".debug";
}
# Clean up the string for bash
$message =~ s/"/\\\"/gs;
#$message =~ s/\(/\\\(/gs;
my $shell_call = $conf->{path}{exe}{logger}." --priority ".$priority_string." --tag ".$conf->{'log'}{tag}." -- \"".$message."\"";
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
print "Unexpected logging output: [".$line."]\n";
}
close $file_handle;
return(0);
}
Loading…
Cancel
Save