Local modifications to ClusterLabs/Anvil by Alteeve
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

522 lines
20 KiB

#!/usr/bin/perl
#
# This updates the host system.
# NOTE: This doesn't update the Anvil! software stack yet, just the OS.
#
# - On Striker; This will take the system offline and then run 'dnf -y update'.
# - On Nodes; This will do nothing until all servers are off the node. Then the node will be withdrawn,
# updated and then rejoin the cluster.
# - On DR; This will do nothing until no servers are running, then it will update the system.
#
# In all cases, the system will be rebooted if the kernel is updated.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connections available.
# 2 = The job UUID was passed, but it wasn't valid.
# 3 = It looks like the update failed, reset progress to '0'.
# 4 = Failed to withdraw the node from the cluster.
#
# TODO:
# - Rebuild this to be 'striker-update-system' and have it update local strikers and all nodes.
#
use strict;
use warnings;
use Anvil::Tools;
# Disable buffering
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"clear-cache",
"no-reboot",
"reboot"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Log that we've started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Connect to DBs.
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"});
$anvil->nice_exit({exit_code => 1});
}
if ($anvil->data->{switches}{'job-uuid'})
{
# Load the job details. If anything is returned, there was a problem.
my $return = $anvil->Job->get_job_details({job_uuid => $anvil->data->{switches}{'job-uuid'}});
if ($return)
{
# It's not a UUID.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { 'return' => $return }});
$anvil->nice_exit({exit_code => 2});
}
}
# Clea any old runs.
update_progress($anvil, 0, "clear");
# We'll keep a count of lines and packages to show the user.
$anvil->data->{counts}{downloaded} = 0;
$anvil->data->{counts}{installed} = 0;
$anvil->data->{counts}{verified} = 0;
$anvil->data->{counts}{lines} = 0;
# Mark that we're starting
update_progress($anvil, 1, "message_0058,!!downloaded!0!!,!!installed!0!!,!!verified!0!!,!!lines!0!!");
update_progress($anvil, 2, "message_0033");
# Make sure maintenance mode is enabled.
$anvil->System->maintenance_mode({set => 1});
# Run the update
run_os_update($anvil, 1, 3);
# We're done updating
my $reboot_needed = $anvil->System->reboot_needed({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
if ($reboot_needed)
{
if (not $anvil->data->{switches}{'no-reboot'})
{
# Clear maintenance mode.
$anvil->System->maintenance_mode({set => 0});
# Record that we're rebooting so that 'striker-update-cluster' knows to wait for a reboot.
if ($anvil->data->{switches}{'job-uuid'})
{
my $query = "
UPDATE
jobs
SET
job_data = 'rebooted',
modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)."
WHERE
job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
# Register a job to reboot.
update_progress($anvil, 98, "message_0318");
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'anvil-manage-power'}." --reboot -y".$anvil->Log->switches,
job_data => "",
job_name => "reboot::system",
job_title => "job_0009",
job_description => "job_0006",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Record that we're going to reboot now.
update_progress($anvil, 100, "message_0317");
}
else
{
# Record that a reboot is needed.
update_progress($anvil, 100, "message_0039");
}
}
else
{
update_progress($anvil, 100, "message_0040");
}
# Clear maintenance mode.
$anvil->System->maintenance_mode({set => 0});
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# This updates the progress if we were called with a job UUID.
sub update_progress
{
my ($anvil, $progress, $message) = @_;
# Log the progress percentage.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
progress => $progress,
message => $message,
"jobs::job_uuid" => $anvil->data->{jobs}{job_uuid},
}});
if ($anvil->data->{jobs}{job_uuid})
{
$anvil->Job->update_progress({
debug => 3,
'print' => 1,
progress => $progress,
message => $message,
job_uuid => $anvil->data->{jobs}{job_uuid},
});
}
return(0);
}
# This updates the OS.
sub run_os_update
{
my ($anvil, $try, $progress) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
try => $try,
progress => $progress,
}});
# This needs to be set to avoid warnings when called without a job-uuid.
$anvil->data->{sys}{last_update} = 0;
# Make sure that, if we're a node, we're out of the cluster.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
if ($host_type eq "node")
{
# Call anvil-safe-stop
update_progress($anvil, $progress++, "message_0314");
my $problem = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
if (not $problem)
{
# Call anvil-safe-stop
update_progress($anvil, $progress++, "message_0315");
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-stop'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong, abort.
update_progress($anvil, 100, "error_0420,!!return_code!".$return_code."!!,!!output!".$output."!!");
# Set the job_data to 'failed' so that striker-update-cluster' knows to abort.
if ($anvil->data->{switches}{'job-uuid'})
{
my $query = "
UPDATE
jobs
SET
job_data = 'failed',
modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)."
WHERE
job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0035", variables => { output => $output } });
$anvil->nice_exit({exit_code => 4});
}
}
}
# Should we clear the cache?
if ($anvil->data->{switches}{'clear-cache'})
{
# Yes.
my $shell_call = $anvil->data->{path}{exe}{dnf}." clean all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
update_progress($anvil, 5, "message_0316");
}
# NOTE: We run this directly to better monitor progress and update the progress.
my $package_changes = 0;
my $transaction_shown = 0;
my $success = 0;
my $to_update = 0;
my $percent_step = 0;
my $counted_lines = 0;
my $next_step = 0;
my $verifying = 0;
my $output = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." -y update; ".$anvil->data->{path}{exe}{echo}." return_code:\$?";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }});
open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
while(<$file_handle>)
{
chomp;
my $line = $_;
$output .= $line."\n";
$line = $anvil->Words->clean_spaces({string => $line});
$anvil->data->{counts}{lines}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "counts::lines" => $anvil->data->{counts}{lines}, line => $line }});
# If there were no updates, let the user know.
if ($line =~ /^Nothing to do/i)
{
update_progress($anvil, 95, "message_0057");
}
if ((not $verifying) && ($line =~ /^Verifying /i))
{
# Update done, verifying now.
$verifying = 1;
update_progress($anvil, $progress, "message_0038");
}
if ($line =~ /Running transaction/i)
{
# Done downloading
if (not $transaction_shown)
{
update_progress($anvil, $progress, "message_0037");
$transaction_shown = 1;
}
}
if ($line =~ /return_code:(\d+)$/)
{
my $return_code = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
if ($return_code == 0)
{
$success = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { success => $success }});
}
}
if ($line =~ / (\d+) Packages$/i)
{
my $counted_lines = $1;
$package_changes = $counted_lines;
$to_update += $counted_lines;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
package_changes => $package_changes,
counted_lines => $counted_lines,
to_update => $to_update,
}});
}
if ($line =~ /Total download size: (.*)$/i)
{
my $update_size = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update_size => $update_size }});
# Ready to install, update to 5%. The next step will count up to 95%.
update_progress($anvil, $progress, "message_0035,!!size!$update_size!!");
# The total (reliable) count of events is (to_update * 3), counting '(x/y): '
# (download), 'Upgrading '/'Installing ' and 'Verifying '. We ignore the scriplet
# and other lines as it's hard to predict how many there will be, and they pass fast
# enough to not really matter for a progress bar.
$to_update *= 4;
$percent_step = $anvil->Convert->round({number => ($to_update / 90)});
$next_step = $percent_step;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
to_update => $to_update,
percent_step => $percent_step,
next_step => $next_step,
}});
}
# If 'percent_step' is set, we're ready to start counting lines.
if (($percent_step) && (($line =~ /\(\d+\/\d+\): /) or ($line =~ /^Upgrading /i) or ($line =~ /^Installing /) or ($line =~ /^Cleanup /i) or ($line =~ /^Verifying /i)))
{
$counted_lines++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { counted_lines => $counted_lines }});
if ($line =~ /\(\d+\/\d+\): /)
{
$anvil->data->{counts}{downloaded}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::downloaded" => $anvil->data->{counts}{downloaded} }});
}
if (($line =~ /^Upgrading /i) or ($line =~ /^Installing /))
{
$anvil->data->{counts}{installed}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::installed" => $anvil->data->{counts}{installed} }});
}
if ($line =~ /^Verifying /i)
{
$anvil->data->{counts}{verified}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::verified" => $anvil->data->{counts}{verified} }});
}
if ($counted_lines > $next_step)
{
# Step up the progress.
$next_step += $percent_step;
$progress++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
next_step => $next_step,
progress => $progress,
}});
$progress = 95 if $progress > 95;
update_progress($anvil, $progress, "");
}
}
# Update the progress if it's been more than a second since the last update.
if (time > $anvil->data->{sys}{last_update})
{
$progress = 95 if $progress > 95;
update_progress($anvil, $progress, "");
}
}
close $file_handle;
# If this is the first try and it failed, see if it's a DRBD issue.
if ((not $success) && ($try == 1))
{
# Is this the DRBD kmod issue?
my $remove_drbd_kmod = 0;
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /kmod-drbd/)
{
# Looks like it.
$remove_drbd_kmod = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remove_drbd_kmod => $remove_drbd_kmod }});
last;
}
}
# Clear the old kmod and try the update again.
if ($remove_drbd_kmod)
{
update_progress($anvil, $progress++, "message_0320");
my $versions_to_remove = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." list installed";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(kmod-drbd-\d+.*?)\s/)
{
$versions_to_remove .= $1." ";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { versions_to_remove => $versions_to_remove }});
}
}
# Now remove those packages.
update_progress($anvil, $progress++, "message_0321");
$shell_call = $anvil->data->{path}{exe}{dnf}." -y remove ".$versions_to_remove;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Now install the new packages.
update_progress($anvil, $progress++, "message_0322");
$shell_call = $anvil->data->{path}{exe}{dnf}." -y install kmod-drbd";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Now try again.
update_progress($anvil, $progress++, "message_0323");
run_os_update($anvil, 2, $progress);
return(0);
}
}
# Reload daemons to pick up any changed systemctl daemons.
my ($systemctl_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{systemctl}." daemon-reload", source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { systemctl_output => $systemctl_output, return_code => $return_code }});
### See if the kernel has been updated.
# Get the newest installed kernel
$shell_call = $anvil->data->{path}{exe}{rpm}." -q kernel | ".$anvil->data->{path}{exe}{'sort'}." | ".$anvil->data->{path}{exe}{tail}." -n 1";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $installed_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
installed_kernel => $installed_kernel,
return_code => $return_code,
}});
$installed_kernel =~ s/^kernel-(\d+.\d+\.\d+-\d+)\..*$/$1/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { installed_kernel => $installed_kernel }});
# Get the running kernel
$shell_call = $anvil->data->{path}{exe}{uname}." -r";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $active_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
active_kernel => $active_kernel,
return_code => $return_code,
}});
$active_kernel =~ s/(\d+.\d+\.\d+-\d+)\..*$/$1/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_kernel => $active_kernel }});
if ($installed_kernel ne $active_kernel)
{
# Reboot needed
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
}
# If we installed and packages, and '--reboot' was given, reboot anyway.
if (($package_changes) && ($anvil->data->{switches}{reboot}))
{
# Reboot needed
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
}
# Did it work?
if (not $success)
{
# Nope.
update_progress($anvil, 0, "message_0036");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0035", variables => { output => $output } });
sleep 5;
$anvil->nice_exit({exit_code => 3});
}
return(0);
};