# 3: peer is already outdated or worse (e.g. inconsistent)
# 4: peer has been successfully fenced
# 5: peer not reachable, assumed to be dead
# 6: please outdate yourself, peer is known (or likely)
# to have better data, or is even currently primary.
# (actually, currently it is "peer is active primary now", but I'd like to
# change that meaning slightly towards the above meaning)
# 7: peer has been STONITHed, thus assumed to be properly fenced
# XXX IMO, this should rather be handled like 5, not 4.
# =========
#
# This program uses;
# - 1 = Something failed
# - 7 = Fence succeeded
# - 255 = End of program hit... should never happen.
#
# TODO:
# - Read the CIB; 'pcs status xml' or '/usr/sbin/cibadmin --local --query' ?
# -- Map the peer's name in pacemaker.
# -- Verify that stonith is enabled:
# -- Verify that the node is not in maintenance mode:
# -- Verify that we're quorate (a-la pacemaker):
# - Verify that the resource is 'resource-and-stonith'
# - Verify that the resource is 'UpToDate' (if not, should we suicide to give the other node priority, regardless of fence delay? what if both nodes have resources that are not UpToDate?)
# -
### NOTE: This doesn't use Anvil::Tools on purpose. We want to be quick and depend on as few things as
to_log($conf, {message => "Target's IP not set via the DRBD_PEER_ADDRESS environment variable. Unable to proceed.", 'line' => __LINE__, level => 0, priority => "err"});
# If we're not all UpToDate, but the peer is, abort
if ((not $local_all_uptodate) && ($peer_all_uptodate))
{
# We're not good
to_log($conf, {message => "This node has DRBD resources that are not UpToDate, but the peer is fully UpToDate. Aborting.", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
return(0);
}
# This identifies the pacemaker name of the target node. If it can't find
sub identify_peer
{
my ($conf) = @_;
# I know the target's (SN) IP, map it to a node.
my $target_host = "";
my $target_ip = $conf->{environment}{DRBD_PEER_ADDRESS};
# First, can we translate the IP to a hostname?
my $shell_call = $conf->{path}{exe}{getent}." hosts ".$target_ip;
# Failed to connect, we're probably not in the cluster.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to get the CIB status.", 'line' => __LINE__, level => 0, priority => "err"});
to_log($conf, {message => "This not is not quorate. Refusing to fence the peer!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
else
{
# Failed to read the CIB XML.
to_log($conf, {message => "This node does not appear to be in the cluster. Unable to read the CIB XML properly.", 'line' => __LINE__, level => 2, priority => "err"});
exit(1);
}
}
# Did I find the target?
if (not $conf->{cluster}{target_node})
{
to_log($conf, {message => "Failed to find the pacemaker name of the target node. Unable to proceed!", 'line' => __LINE__, level => 0, priority => "err"});
to_log(LOG_DEBUG(), $conf, __LINE__, "Target node: [$remote_node] is *not* a cluster member (state: [$conf->{nodes}{$remote_node}{member}]). Not ejecting.");
return 0;
}
}
# Log file entries
sub to_log
{
my ($conf, $parameters) = @_;
my $facility = defined $parameters->{facility} ? $parameters->{facility} : $conf->{'log'}{facility};
my $level = defined $parameters->{level} ? $parameters->{level} : 1;
my $line = defined $parameters->{'line'} ? $parameters->{'line'} : 0;
my $message = defined $parameters->{message} ? $parameters->{message} : "";
my $priority = defined $parameters->{priority} ? $parameters->{priority} : "";
# Leave if we don't care about this message
return if $level > $conf->{'log'}{level};
return if not $message;
# Build the message. We log the line
if (($conf->{'log'}{line_numbers}) && ($line))
{
$message = $line."; ".$message;
}
my $priority_string = $facility;
if ($priority)
{
$priority_string .= ".".$priority;
}
elsif ($level eq "0")
{
$priority_string .= ".notice";
}
elsif (($level eq "1") or ($level eq "2"))
{
$priority_string .= ".info";
}
else
{
$priority_string .= ".debug";
}
# Clean up the string for bash
$message =~ s/"/\\\"/gs;
$message =~ s/\(/\\\(/gs;
my $shell_call = $conf->{path}{exe}{logger}." --priority ".$priority_string." --tag ".$conf->{'log'}{tag}." -- \"".$message."\"";
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
print "Unexpected logging output: [".$line."]\n";
}
close $file_handle;
return(0);
}
sub get_fenced_state
{
my ($conf) = @_;
my $sc = IO::Handle->new();
open($sc, "fence_tool -n ls |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: fence_tools error was $!\n");
my %fence_state;
<$sc> =~ /fence domain/;
$fence_state{member_count} = $1 if (<$sc> =~ /member count\s+([0-9]+)/);
$fence_state{victim_count} = $1 if (<$sc> =~ /victim count\s+([0-9]+)/);
$fence_state{victim_now} = $1 if (<$sc> =~ /victim now\s+([0-9]+)/);
$fence_state{master_nodeid} = $1 if (<$sc> =~ /master nodeid\s+([0-9]+)/);
$fence_state{wait_state} = $1 if (<$sc> =~ /wait state\s+(\w+)$/);
$sc->close();
return \%fence_state;
}
sub wait_for_fenced_status
{
my ($conf, $target_state, $time_seconds)=@_;
my $fenced_state;
while ($time_seconds)
{
$fenced_state = get_fenced_state($conf);
if ($fenced_state->{wait_state} eq $target_state)
{
return $fenced_state;
}
sleep(1);
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Waiting for $target_state. Now $fenced_state->{wait_state}\n");
$time_seconds--;
}
return $fenced_state;
}
sub eventually_wait_for_fenced
{
my ($conf) = @_;
my $fenced_state1 = wait_for_fenced_status($conf, "fencing", 30);
if ($fenced_state1->{wait_state} ne "fencing")
{
to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Expected fencd to do a fence action, got $fenced_state1->{wait_state}\n");
return;
}
my $to_fence_node_id = $conf->{nodes}{$conf->{environment}{DRBD_PEERS}}{id};
if ($fenced_state1->{victim_now} != $to_fence_node_id)
{
to_log(LOG_ERR(), $conf, __LINE__, "Fenced is shooting at $fenced_state1->{victim_now}; Should shoot at $to_fence_node_id\n");
}
my $fenced_state2 = wait_for_fenced_status($conf, "none", 240);
if ($fenced_state2->{wait_state} eq "none")
{
to_log(LOG_INFO(), $conf, __LINE__, "Seems fenced was successfull\n");