* Increaded the scancore agent run timeout to 60 seconds.

* Updated anvil-safe-start to start DRBD resources when the peer's DRBD resourcs is 'Connecting',
* Updated fence_pacemaker to more intelligently check the list of host names related to an IP address when looking for the peer host name

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent c2b57ca3c8
commit ad4a1ecc78
  1. 2
      Anvil/Tools.pm
  2. 4
      Anvil/Tools/Get.pm
  3. 4
      scancore-agents/scan-ipmitool/scan-ipmitool
  4. 2
      scancore-agents/scan-server/scan-server.xml
  5. 57
      tools/anvil-safe-start
  6. 32
      tools/fence_delay
  7. 52
      tools/fence_pacemaker

@ -834,7 +834,7 @@ sub _set_defaults
$anvil->data->{scancore} = {
timing => {
# Delay between DB connection attempts when no databases are available?
agent_runtime => 30,
agent_runtime => 60,
db_retry_interval => 2,
# Delay between scans?
run_interval => 30,

@ -780,11 +780,11 @@ sub bridges
if (not $test)
{
# JSON parse failed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "error_0140", variables => {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "error_0140", variables => {
json => $output,
error => $@,
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0519"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0519"});
# NOTE: This is not design to be normally used. It was created as a stop-gap while waiting
# for resolution on: https://bugzilla.redhat.com/show_bug.cgi?id=1868467

@ -182,7 +182,7 @@ $anvil->data->{'scan-ipmitool'} = {
},
# On Dells, 'Temp (xxh)' change a lot, so we bump the jump.
'Temp' => {
jump => 20,
jump => 30,
},
},
# TODO: Remove this and have Striker pull the list of thermal sensors read in the
@ -235,7 +235,7 @@ if ($anvil->data->{switches}{purge})
if (not find_ipmi_targets($anvil))
{
# No targets found.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_ipmitool_message_0001"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_message_0001"});
$anvil->nice_exit({exit_code => 1});
}

@ -63,7 +63,7 @@ The definition for the server: [#!variable!server!#] was edited outside of the A
======================
</key>
<key name="scan_server_alert_0004">The name of the server: [#!variable!old_name!#] has changed to: [#!variable!new_name!#]</key>
<key name="scan_server_alert_0005">The server: [#!variable!name!#] state has changed from: [#!variable!old_state!#] to: [#!variable!new_state!#].</key>
<key name="scan_server_alert_0005">The server: [#!variable!server!#] state has changed from: [#!variable!old_state!#] to: [#!variable!new_state!#].</key>
<key name="scan_server_alert_0006">
A new server named: [#!variable!server!#] has been found. The definition XML is:
====

@ -126,7 +126,59 @@ sub check_drbd
{
my ($anvil) = @_;
# Find the servers running on the peer.
my $short_host_name = $anvil->Get->short_host_name();
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
my $peer_password = $anvil->data->{sys}{peer_password};
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
short_host_name => $short_host_name,
peer_host_uuid => $peer_host_uuid,
peer_short_host_name => $peer_short_host_name,
peer_password => $anvil->Log->is_secure($peer_password),
peer_ip_address => $peer_ip_address,
}});
# Get the list of resources up on the peer.
$anvil->DRBD->get_status({debug => 2});
$anvil->DRBD->get_status({
debug => 2,
password => $peer_password,
target => $peer_ip_address,
});
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}})
{
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}})
{
my $peer_is_me = $anvil->Network->is_local({host => $peer_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_host_uuid => $peer_host_uuid,
peer_short_host_name => $peer_short_host_name,
peer_password => $anvil->Log->is_secure($peer_password),
peer_ip_address => $peer_ip_address,
}});
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'};
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_connection_state => $peer_connection_state,
local_connection_state => $local_connection_state,
}});
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i))
{
# Start the DRBD resource locally.
my $return_code = $anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
}
}
}
return(0);
}
@ -415,6 +467,11 @@ sub wait_for_access
network => $this_network,
peer_ip => $peer_ip_address,
}});
$anvil->data->{sys}{peer_target_ip} = $peer_ip_address;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::peer_target_ip" => $anvil->data->{sys}{peer_target_ip},
}});
}
else
{

@ -38,7 +38,7 @@ my $conf = {
action => "off",
agent_version => "1.0",
log_level => 1,
wait => 60,
'wait' => 60,
device => "",
list => "",
'log' => "/var/log/fence_delay.log",
@ -55,7 +55,7 @@ my $conf = {
# Log file for output.
my $log = IO::Handle->new();
open ($log, ">>".$conf->{'system'}{'log'}) || die "Failed to open: [".$conf->{'system'}{'log'}."] for writing; Error: $!\n";
open ($log, ">>".$conf->{'system'}{'log'}) || warn "Failed to open: [".$conf->{'system'}{'log'}."] for writing; Error: $!\n";
# Set $log and STDOUT to hot (unbuffered) output.
if (1)
{
@ -74,7 +74,7 @@ read_cla($conf, $log);
read_stdin($conf, $log);
# If I've been asked to show the metadata XML, do so and then exit.
if ($conf->{'system'}{action} eq "metadata")
if (($conf->{'system'}{action} eq "metadata") or ($conf->{'system'}{action} eq "meta-data"))
{
metadata($conf, $log);
do_exit($conf, $log, 0);
@ -274,36 +274,36 @@ sub read_cla
{
# Print the version information and then exit.
$conf->{'system'}{version} = 1;
record($conf, $log, "[ Debug ] - 'system::version': [".$conf->{'system'}{version}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::version': . [".$conf->{'system'}{version}."]\n", 1);
}
elsif (($arg eq "-q") or ($arg eq "--quiet"))
{
# Suppress all messages, including critical messages, from STDOUT.
$conf->{'system'}{log_level} = 0;
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{quiet}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{quiet}."]\n", 1);
}
elsif ($arg eq "--debug")
{
# Enable debug mode.
$conf->{'system'}{log_level} = 2;
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 1);
}
elsif (($arg eq "-w") or ($arg eq "--wait"))
{
# How long to wait before exiting.
$set_next = "wait";
record($conf, $log, "[ Debug ] - 'set_next': [".$set_next."]\n", 2);
record($conf, $log, "[ Debug ] - 'set_next': ........ [".$set_next."]\n", 1);
}
elsif (($arg eq "-o") or ($arg eq "--action"))
{
# This is the action to take.
$set_next = "action";
record($conf, $log, "[ Debug ] - 'set_next': [".$set_next."]\n", 2);
record($conf, $log, "[ Debug ] - 'set_next': ........ [".$set_next."]\n", 1);
}
else
{
# Bad argument.
record($conf, $log, "[ Warning ] - Argument: [".$arg."] is not valid arguments.\n", 2);
record($conf, $log, "[ Warning ] - Argument: [".$arg."] is not valid arguments.\n", 1);
}
}
@ -344,7 +344,7 @@ sub read_stdin
my ($name, $value) = split /\s*=\s*/, $option;
# Record the line for now, but comment this out before release.
record ($conf, $log, "Name: [$name], value: [$value].\n");
record ($conf, $log, "Name: [$name], value: [$value].\n", 2);
# Set my variables depending on the veriable name.
if ($name eq "agent")
@ -352,27 +352,27 @@ sub read_stdin
# This is only used by 'fenced', but I record it for
# potential debugging.
$conf->{'system'}{agent} = $value;
record($conf, $log, "[ Debug ] - 'system::agent': [".$conf->{'system'}{agent}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::agent': ... [".$conf->{'system'}{agent}."]\n", 1);
}
elsif ($name eq "action")
{
$conf->{'system'}{action} = $value;
record($conf, $log, "[ Debug ] - 'system::action': [".$conf->{'system'}{action}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::action': .. [".$conf->{'system'}{action}."]\n", 1);
}
elsif ($name eq "quiet")
{
$conf->{'system'}{log_level} = 0;
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 1);
}
elsif ($name eq "debug")
{
$conf->{'system'}{log_level} = 2;
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::log_level': [".$conf->{'system'}{log_level}."]\n", 1);
}
elsif ($name eq "wait")
{
$conf->{'system'}{wait} = $value;
record($conf, $log, "[ Debug ] - 'system::wait': [".$conf->{'system'}{wait}."]\n", 2);
record($conf, $log, "[ Debug ] - 'system::wait': .... [".$conf->{'system'}{'wait'}."]\n", 1);
}
else
{
@ -392,7 +392,7 @@ sub record
return if $level > $conf->{'system'}{log_level};
# Print to the log
print $log $msg;
print $log get_date_time($conf)." - ".$msg;
# Print to the screen if we're not 'quiet'.
print $msg if not $conf->{'system'}{quiet};

@ -249,7 +249,7 @@ sub get_drbd_status
return(0);
}
# This identifies the pacemaker name of the target node. If it can't find
# This identifies the pacemaker name of the target node. If it can't find the peer, it exits with '1'.
sub identify_peer
{
my ($conf) = @_;
@ -270,13 +270,15 @@ sub identify_peer
to_log($conf, {message => "Output: [$line]", 'line' => __LINE__, level => 2});
if ($line =~ /^$target_ip\s+(.*)$/)
{
# This could be multiple names.
$target_host = $1;
to_log($conf, {message => ">> target_host: [$target_host]", 'line' => __LINE__, level => 2});
to_log($conf, {message => "target_host: [$target_host]", 'line' => __LINE__, level => 2});
#to_log($conf, {message => ">> target_host: [$target_host]", 'line' => __LINE__, level => 2});
# Strip off any suffix, we only want the short name.
$target_host =~ s/\..*//;
to_log($conf, {message => "<< target_host: [$target_host]", 'line' => __LINE__, level => 2});
last;
#$target_host =~ s/\..*//;
#to_log($conf, {message => "<< target_host: [$target_host]", 'line' => __LINE__, level => 2});
#last;
}
}
close $file_handle;
@ -294,30 +296,38 @@ sub identify_peer
my $host_name = $ENV{HOSTNAME};
my $short_host_name = $ENV{HOSTNAME};
$short_host_name =~ s/\..*$//;
to_log($conf, {message => "host_name: [$host_name], short_host_name: [".$short_host_name."]", 'line' => __LINE__, level => 2});
foreach my $hash_ref (sort {$a cmp $b} @{$body->{configuration}{nodes}{node}})
{
my $node = $hash_ref->{uname};
my $id = $hash_ref->{id};
if ($node =~ /^$target_host/)
to_log($conf, {message => "node: [$node], id: [$id]", 'line' => __LINE__, level => 2});
foreach my $target_name (split/ /, $target_host)
{
$conf->{cluster}{target_node} = $node;
to_log($conf, {message => "Found the pacemaker name of the target node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
}
elsif ($node =~ /^$short_host_name/)
{
# THis is me. Am I in maintenance mode?
if (exists $hash_ref->{instance_attributes})
to_log($conf, {message => ">> target_name: [$target_name]", 'line' => __LINE__, level => 2});
$target_name =~ s/\..*//;
to_log($conf, {message => "<< target_name: [$target_name]", 'line' => __LINE__, level => 2});
if ($node =~ /^$target_name/)
{
$conf->{cluster}{target_node} = $node;
to_log($conf, {message => "Found the pacemaker name of the target node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
}
elsif ($node =~ /^$short_host_name/)
{
# We've got some data...
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : "";
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : "";
to_log($conf, {message => "node: [$node] instance attribyte name: [$name], value: [$value]", 'line' => __LINE__, level => 1});
if (($name eq "maintenance") and ($value eq "on"))
# This is me. Am I in maintenance mode?
if (exists $hash_ref->{instance_attributes})
{
# We're in maintenance mode, abort.
to_log($conf, {message => "This node is in maintenance mode. Not able to fence!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
# We've got some data...
my $name = defined $hash_ref->{instance_attributes}{nvpair}{name} ? $hash_ref->{instance_attributes}{nvpair}{name} : "";
my $value = defined $hash_ref->{instance_attributes}{nvpair}{value} ? $hash_ref->{instance_attributes}{nvpair}{value} : "";
to_log($conf, {message => "node: [$node] instance attribyte name: [$name], value: [$value]", 'line' => __LINE__, level => 1});
if (($name eq "maintenance") and ($value eq "on"))
{
# We're in maintenance mode, abort.
to_log($conf, {message => "This node is in maintenance mode. Not able to fence!", 'line' => __LINE__, level => 0, priority => "err"});
exit(1);
}
}
}
}

Loading…
Cancel
Save