* Added System->configure_logind() to ensure that nodes are configured to ignore ACPI power button events so that IPMI-based fences work immediately.

* Added call to System->configure_logind() to anvil-join-anvil and anvil-version-changes.
* Updated fence_pacemaker to add '--reboot' to the 'stonith_admin' call to ensure DRBD-triggered fence requests reboot instead of just turning nodes off.
This commit address issue #279.

Signed-off-by: digimer <digimer@gravitar.alteeve.com>
main
digimer 2 years ago
parent c5fbf20615
commit a3988cc3e5
  1. 1
      Anvil/Tools.pm
  2. 105
      Anvil/Tools/Cluster.pm
  3. 4
      Anvil/Tools/Database.pm
  4. 3
      Anvil/Tools/Server.pm
  5. 2
      share/words.xml
  6. 2
      tools/anvil-daemon
  7. 4
      tools/anvil-join-anvil
  8. 4
      tools/anvil-version-changes
  9. 2
      tools/fence_pacemaker

@ -1056,6 +1056,7 @@ sub _set_paths
'httpd.conf' => "/etc/httpd/conf/httpd.conf",
'journald_anvil' => "/etc/systemd/journald.conf.d/anvil.conf",
'journald.conf' => "/etc/systemd/journald.conf",
'logind.conf' => "/etc/systemd/logind.conf",
'lvm.conf' => "/etc/lvm/lvm.conf",
'pg_hba.conf' => "/var/lib/pgsql/data/pg_hba.conf",
'postgresql.conf' => "/var/lib/pgsql/data/postgresql.conf",

@ -8,6 +8,7 @@ use warnings;
use Data::Dumper;
use Scalar::Util qw(weaken isweak);
use String::ShellQuote;
use Text::Diff;
use XML::LibXML;
use XML::Simple qw(:strict);
@ -21,6 +22,7 @@ my $THIS_FILE = "Cluster.pm";
# check_node_status
# check_server_constraints
# check_stonith_config
# configure_logind
# delete_server
# get_fence_methods
# get_anvil_name
@ -1626,6 +1628,109 @@ sub check_stonith_config
}
=head2 configure_logind
This configures logind to ensure it doesn't try to do a graceful shutdown when being fenced via acpid power-button events.
See: https://access.redhat.com/solutions/1578823
This method takes no parameters
=cut
sub configure_logind
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->configure_logind()" }});
# Only run this on nodes.
my $host_type = $anvil->Get->host_type({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_type => $host_type }});
if ($host_type ne "node")
{
return(0);
}
# Read in the file.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
'path::configs::logind.conf' => $anvil->data->{path}{configs}{'logind.conf'},
}});
if (not -e $anvil->data->{path}{configs}{'logind.conf'})
{
# wtf?
return(0);
}
my $added = 0;
my $new_body = "";
my $old_body = $anvil->Storage->read_file({debug => $debug, file => $anvil->data->{path}{configs}{'logind.conf'}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_body => $old_body }});
if ($old_body eq "!!error!!")
{
return(0);
}
# If we don't see 'HandlePowerKey=ignore', we need to add it.
foreach my $line (split/\n/, $old_body)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_body => $old_body }});
$new_body .= $line."\n";
if ($line =~ /^HandlePowerKey=(.*)$/)
{
# It's been set. No matter how it's set, we don't change it again.
my $set_to = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { set_to => $set_to }});
return(0);
}
if ($line =~ /^#HandlePowerKey=/)
{
# Add line under the commented out one.
$new_body .= "HandlePowerKey=ignore\n";
$added = 1;
}
}
if (not $added)
{
# Append it.
$new_body .= "HandlePowerKey=ignore\n";
$added = 1;
}
# Still here? We almost certainly want to save then, but lets look for a difference just the same.
my $difference = diff \$old_body, \$new_body, { STYLE => 'Unified' };
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
added => $added,
difference => $difference,
}});
if ($added)
{
# Write it out.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0732"});
$anvil->Storage->write_file({
file => $anvil->data->{path}{configs}{'logind.conf'},
body => $new_body,
backup => 1,
overwrite => 1,
});
sleep 1;
# Restart the daemon.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0733", variables => { daemon => "systemd-logind.service" }});
$anvil->System->restart_daemon({
debug => $debug,
daemon => "systemd-logind.service",
});
}
return(0);
}
=head2 delete_server
This takes a server (resource) name and deletes it from pacemaker. If there is a problem, C<< !!error!! >> is returned. Otherwise, C<< 0 >> is removed either once the resource is deleted, or if the resource didn't exist in the first place.

@ -12199,9 +12199,9 @@ WHERE
{
my $difference = diff \$old_server_definition_xml, \$server_definition_xml, { STYLE => 'Unified' };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0556", variables => {
server_name => $server_name,
server_name => $server_name,
server_definition_server_uuid => $server_definition_server_uuid,
difference => $difference,
difference => $difference,
}});
}

@ -35,6 +35,9 @@ Example;
use warnings;
use Sys::Virt;
# https://metacpan.org/pod/Sys::Virt::Domain
# https://libvirt.org/api.html
my $uri = "qemu:///system";
my $connection = Sys::Virt->new(uri => $uri);
my @domains = $connection->list_domains();

@ -2316,6 +2316,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0729">The DRBD Proxy license file has expired.</key>
<key name="log_0730">None of the MAC sddresses in the The DRBD Proxy license file match any of the MAC addresses on this system.</key>
<key name="log_0731">The DRBD Proxy license file: [#!data!path::configs::drbd-proxy.license!#] is missing expected data or is malformed.</key>
<key name="log_0732">Updating logind to ignore ACPI power button events so that IPMI-based fence requests don't trigger an attempt to gracefully shut down. For more information, see: https://access.redhat.com/solutions/1578823</key>
<key name="job_0733">Restarting the daemon: [#!variable!daemon!#].</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -1295,7 +1295,7 @@ sub handle_special_cases
my ($anvil) = @_;
# Thsi is now handled by 'anvil-version-changes'
my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'};
my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});

@ -1069,6 +1069,10 @@ sub configure_pacemaker
}
}
# Make sure logind is update to handle fencing properly
# see - https://access.redhat.com/solutions/1578823
$anvil->Cluster->configure_logind({debug => 2});
# Enable fencing and set the retry to INFINITY, if needed.
$anvil->data->{cib}{parsed}{data}{stonith}{'max-attempts'} = "" if not defined $anvil->data->{cib}{parsed}{data}{stonith}{'max-attempts'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {

@ -104,6 +104,10 @@ sub node_checks
# Make sure DRBD compiled after a kernel upgrade.
$anvil->DRBD->_initialize_kmod({debug => 2});
# Make sure logind is update to handle fencing properly
# see - https://access.redhat.com/solutions/1578823
$anvil->Cluster->configure_logind({debug => 2});
return(0);
}

@ -738,7 +738,7 @@ sub kill_target
my ($conf) = @_;
# Variables
my $shell_call = $conf->{path}{exe}{stonith_admin}." --fence ".$conf->{cluster}{target_node}." --verbose; RC=\$?; ".$conf->{path}{exe}{crm_error}." \$RC; ".$conf->{path}{exe}{echo}." rc:\$RC";
my $shell_call = $conf->{path}{exe}{stonith_admin}." --fence ".$conf->{cluster}{target_node}." --reboot --verbose; RC=\$?; ".$conf->{path}{exe}{crm_error}." \$RC; ".$conf->{path}{exe}{echo}." rc:\$RC";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)

Loading…
Cancel
Save