Local modifications to ClusterLabs/Anvil by Alteeve
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

304 lines
10 KiB

#!/usr/bin/perl
#
# This manages power on the host. It can set that a reboot is or is no longer required. It can also reboot or
# power off the machine.
#
# Examples;
# - Mark that a reboot is required - anvil-manage-power --reboot-needed 1
# - Clear that a reboot is needed - anvil-manage-power --reboot-needed 0
# - Report whether a reboot is needed or not - anvil-manage-power
# - Reboot the system - anvil-manage-power --reboot [-y]
# - Power the system off - anvil-manage-power --poweroff [-y]
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connections available.
#
# TODO: Don't reboot or power off until all external users are done with the database on this system (if
# applicable)
#
use strict;
use warnings;
use Anvil::Tools;
# Disable buffering
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"no-wait",
"power-off",
"poweroff",
"reboot",
"reboot-needed"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
if ($anvil->data->{switches}{'power-off'})
{
$anvil->data->{switches}{'poweroff'} = 1;
}
if ($anvil->data->{switches}{'yes'})
{
$anvil->data->{switches}{'y'} = 1;
}
# Connect to DBs.
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
print $anvil->Words->string({key => "error_0003"})."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0003"});
$anvil->nice_exit({exit_code => 1});
}
# Clear the job in case a previous call failed.
$anvil->Job->clear({debug => 2, job_uuid => $anvil->data->{switches}{'job-uuid'}}) if $anvil->data->{switches}{'job-uuid'};
# Are we being asked to reboot or power off?
if ($anvil->data->{switches}{'reboot'})
{
# Did the user confirm?
if ($anvil->data->{switches}{'y'})
{
do_poweroff($anvil, "reboot");
}
else
{
# Not yet, ask to confirm.
print $anvil->Words->string({key => "message_0059"})." ";
my $answer = <STDIN>;
chomp($answer);
if ($answer =~ /^y/i)
{
do_poweroff($anvil, "reboot");
}
else
{
# Abort and exit.
print $anvil->Words->string({key => "message_0061"})."\n";
$anvil->nice_exit({exit_code => 0});
}
}
}
if ($anvil->data->{switches}{'poweroff'})
{
# Did the user confirm?
if ($anvil->data->{switches}{'y'})
{
do_poweroff($anvil, "poweroff");
}
else
{
# Not yet, ask to confirm.
print $anvil->Words->string({key => "message_0060"})." ";
my $answer = <STDIN>;
chomp($answer);
if ($answer =~ /^y/i)
{
do_poweroff($anvil, "poweroff");
}
else
{
# Abort and exit.
print $anvil->Words->string({key => "message_0061"})."\n";
$anvil->nice_exit({exit_code => 0});
}
}
}
my $reboot_needed = $anvil->System->reboot_needed({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::reboot-needed" => $anvil->data->{switches}{'reboot-needed'} }});
if ($anvil->data->{switches}{'reboot-needed'} eq "1")
{
# Enable
if (not $reboot_needed)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0688!#" }});
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
print $anvil->Words->string({key => "message_0048"})."\n";
}
else
{
# Was already set, do nothing
print $anvil->Words->string({key => "message_0049"})."\n";
}
}
elsif ($anvil->data->{switches}{'reboot-needed'} eq "0")
{
# Disabled
if ($reboot_needed)
{
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
print $anvil->Words->string({key => "message_0050"})."\n";
}
else
{
# Was already disabled, do nothing
print $anvil->Words->string({key => "message_0051"})."\n";
}
}
elsif ($anvil->data->{switches}{'reboot-needed'})
{
# Bad call
print $anvil->Words->string({key => "message_0052", variables => { program => $THIS_FILE }})."\n";
}
# Get the current state
if ($reboot_needed)
{
# Report that we need to reboot
print $anvil->Words->string({key => "message_0053"})."\n";
}
else
{
# Report that we're not.
print $anvil->Words->string({key => "message_0054"})."\n";
}
# We're done
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Private functions. #
#############################################################################################################
# This does a reboot or power off
sub do_poweroff
{
my ($anvil, $task) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { task => $task }});
# In case we're being called by another job, we'll sleep for a few second to let those close out.
sleep 3;
# We'll wait until the system has at least 5 minutes of uptime, unless '--no-wait' was given.
my $uptime = $anvil->data->{switches}{'no-wait'} ? 0 : $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::no-wait" => $anvil->data->{switches}{'no-wait'},
uptime => $uptime,
}});
my $say_task = $task eq "poweroff" ? "message_0062" : "message_0063";
print $anvil->Words->string({key => $say_task})."\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => $say_task});
# To minimize the trouble of a problem where the reboot needed flag isn't cleared, and so the system
# wants to repeatedly reboot, we need to add a delay to not let anvil-daemon ask us to
# reboot/power-off until the system uptime is more than ten minutes.
if (($uptime) && ($uptime < 300))
{
# We'll wait until the system has been running for ten minutes.
my $difference = 300 - $uptime;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0224", variables => {
task => $task eq "poweroff" ? "#!string!log_0225!#" : "#!string!log_0226!#",
difference => $difference,
uptime => $uptime,
say_time => $anvil->Get->date_and_time({offset => $difference, time_only => 1}),
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => $say_task});
sleep $difference;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0227", variables => {
task => $task eq "poweroff" ? "#!string!log_0225!#" : "#!string!log_0226!#",
}});
}
# If I don't have a job_uuid, try to find one.
my $job_uuid = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }});
if ($anvil->data->{switches}{'job-uuid'})
{
$job_uuid = $anvil->data->{switches}{'job-uuid'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
}
# Make sure the 'reboot needed' flag is set. When 'anvil-daemon' starts, it will use this to confirm
# that it is starting post-reboot and clear it.
my $say_reason = $task eq "poweroff" ? "log_0689" : "log_0688";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!".$say_reason."!#" }});
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
# Mark our power state.
$anvil->Database->update_host_status({
debug => 2,
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
host_uuid => $anvil->Get->host_uuid,
host_status => $task eq "poweroff" ? "rebooting" : "stopping",
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
});
# Wait if anvil-version-change is running.
my $next_log = time - 1;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
waiting => $waiting,
}});
while ($waiting)
{
my $pids = $anvil->System->pids({program_name => $anvil->data->{path}{exe}{'anvil-version-changes'}});
my $avc_instances = @{$pids};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { avc_instances => $avc_instances }});
if ($avc_instances)
{
if (time > $next_log)
{
my $say_pids = "";
foreach my $pid (@{$pids})
{
$say_pids .= $pid.", ";
}
$say_pids =~ s/, $//;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0325", variables => { pids => $say_pids }});
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
}
sleep 10;
}
else
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
# If we have a job UUID, mark that we're done.
if ($job_uuid)
{
$anvil->Job->update_progress({
debug => 2,
progress => 100,
message => $say_task,
job_uuid => $job_uuid,
});
}
# Now do the deed.
my $shell_call = $anvil->data->{path}{exe}{systemctl}." ".$task;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
# Unlikely we're still alive, but 'poweroff' and 'reboot' do return once enqueued, so...
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code }});
$anvil->nice_exit({exit_code => 0});
}