#!/usr/bin/perl # # This updates the host system. # NOTE: This doesn't update the Anvil! software stack yet, just the OS. # # - On Striker; This will take the system offline and then run 'dnf -y update'. # - On Nodes; This will do nothing until all servers are off the node. Then the node will be withdrawn, # updated and then rejoin the cluster. # - On DR; This will do nothing until no servers are running, then it will update the system. # # In all cases, the system will be rebooted if the kernel is updated. # # Exit codes; # 0 = Normal exit. # 1 = No database connections available. # 2 = The job UUID was passed, but it wasn't valid. # 3 = It looks like the update failed, reset progress to '0'. # 4 = Failed to withdraw the node from the cluster. # # TODO: # - Rebuild this to be 'striker-update-system' and have it update local strikers and all nodes. # use strict; use warnings; use Anvil::Tools; # Disable buffering $| = 1; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } my $anvil = Anvil::Tools->new(); # Read switches (target ([user@]host[:port]) and the file with the target's password. $anvil->Get->switches({list => [ "clear-cache", "no-db", "no-reboot", "reboot"], man => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); # Connect to DBs. if ($anvil->data->{switches}{'no-db'}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "log_0743"}); # If there was a job-uuid, clear it. $anvil->data->{sys}{database}{connections} = 0; $anvil->data->{switches}{'job-uuid'} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::database::connections' => $anvil->data->{sys}{database}{connections}, 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, }}); } else { $anvil->Database->connect; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, exit. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"}); $anvil->nice_exit({exit_code => 1}); } } if ($anvil->data->{switches}{'job-uuid'}) { # See if another instance is running. If so, sleep for 10 seconds and then exit. The other instance # could be the '--no-db' run we're about to clobber. my $pids = $anvil->System->pids({ ignore_me => 1, program_name => $THIS_FILE, }); my $other_instances = @{$pids}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }}); if ($other_instances) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233", variables => { program => $THIS_FILE }}); sleep 10; $anvil->nice_exit({exit_code => 0}); } # Load the job details. If anything is returned, there was a problem. my $return = $anvil->Job->get_job_details({job_uuid => $anvil->data->{switches}{'job-uuid'}}); if ($return) { # It's not a UUID. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { 'return' => $return }}); $anvil->nice_exit({exit_code => 2}); } } # Clear any old runs. update_progress($anvil, 0, "clear"); # We'll keep a count of lines and packages to show the user. $anvil->data->{counts}{downloaded} = 0; $anvil->data->{counts}{installed} = 0; $anvil->data->{counts}{verified} = 0; $anvil->data->{counts}{lines} = 0; # Mark that we're starting update_progress($anvil, 1, "message_0058,!!downloaded!0!!,!!installed!0!!,!!verified!0!!,!!lines!0!!"); update_progress($anvil, 2, "message_0033"); # Make sure maintenance mode is enabled. $anvil->System->maintenance_mode({set => 1}) if $anvil->data->{sys}{database}{connections}; # Run the update run_os_update($anvil, 1, 3); # If we had no database, try to reconnect now tha if (not $anvil->data->{sys}{database}{connections}) { # Start the anvil-daemon, the caller likely called without a DB because we're being updated by # striker-update-cluster, and so there will be a job waiting for us. $anvil->System->enable_daemon({now => 1, daemon => "anvil-daemon"}); $anvil->System->enable_daemon({now => 1, daemon => "scancore"}); $anvil->Database->connect; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if ($anvil->data->{sys}{database}{connections}) { # If there's a job for us waiting, mark it as almost done. my $query = " SELECT job_uuid FROM jobs WHERE job_command LIKE '%".$THIS_FILE."%' AND job_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." AND job_progress = 0 ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $job_uuid = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); if ($job_uuid) { $anvil->data->{switches}{'job-uuid'} = $job_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, }}); update_progress($anvil, 0, "clear"); update_progress($anvil, 90, "message_0324"); } } } # We're done updating my $reboot_needed = $anvil->System->reboot_needed({debug => 2}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }}); if ($reboot_needed) { if (not $anvil->data->{switches}{'no-reboot'}) { # Clear maintenance mode. $anvil->System->maintenance_mode({set => 0}) if $anvil->data->{sys}{database}{connections}; # Record that we're rebooting so that 'striker-update-cluster' knows to wait for a reboot. if ($anvil->data->{switches}{'job-uuid'}) { my $query = " UPDATE jobs SET job_data = 'rebooted', modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)." WHERE job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})." ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); } # Register a job to reboot. update_progress($anvil, 98, "message_0318"); if ($anvil->data->{sys}{database}{connections}) { my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ file => $THIS_FILE, line => __LINE__, job_command => $anvil->data->{path}{exe}{'anvil-manage-power'}." --reboot -y".$anvil->Log->switches, job_data => "", job_name => "reboot::system", job_title => "job_0009", job_description => "job_0006", job_progress => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); } # Record that we're going to reboot now. update_progress($anvil, 100, "message_0317"); } else { # Record that a reboot is needed. update_progress($anvil, 100, "message_0039"); } } else { update_progress($anvil, 100, "message_0040"); } # Clear maintenance mode. $anvil->System->maintenance_mode({set => 0}) if $anvil->data->{sys}{database}{connections}; $anvil->nice_exit({exit_code => 0}); ############################################################################################################# # Functions # ############################################################################################################# # This updates the progress if we were called with a job UUID. sub update_progress { my ($anvil, $progress, $message) = @_; # Log the progress percentage. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { progress => $progress, message => $message, "jobs::job_uuid" => $anvil->data->{jobs}{job_uuid}, }}); if ($anvil->data->{jobs}{job_uuid}) { $anvil->Job->update_progress({ debug => 3, 'print' => 1, progress => $progress, message => $message, job_uuid => $anvil->data->{jobs}{job_uuid}, }); } return(0); } # This updates the OS. sub run_os_update { my ($anvil, $try, $progress) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { try => $try, progress => $progress, }}); # This needs to be set to avoid warnings when called without a job-uuid. $anvil->data->{sys}{last_update} = 0; # Make sure that, if we're a node, we're out of the cluster. my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }}); if ($host_type eq "node") { # Call anvil-safe-stop update_progress($anvil, $progress++, "message_0314"); my $problem = $anvil->Cluster->parse_cib({debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); if (not $problem) { # Call anvil-safe-stop update_progress($anvil, $progress++, "message_0315"); my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-stop'}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); if ($return_code) { # Something went wrong, abort. update_progress($anvil, 100, "error_0420,!!return_code!".$return_code."!!,!!output!".$output."!!"); # Set the job_data to 'failed' so that striker-update-cluster' knows to abort. if ($anvil->data->{switches}{'job-uuid'}) { my $query = " UPDATE jobs SET job_data = 'failed', modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)." WHERE job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})." ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0035", variables => { output => $output } }); $anvil->nice_exit({exit_code => 4}); } } } # Should we clear the cache? if ($anvil->data->{switches}{'clear-cache'}) { # Yes. my $shell_call = $anvil->data->{path}{exe}{dnf}." clean all"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); update_progress($anvil, 5, "message_0316"); } # NOTE: We run this directly to better monitor progress and update the progress. my $package_changes = 0; my $transaction_shown = 0; my $success = 0; my $to_update = 0; my $percent_step = 0; my $counted_lines = 0; my $next_step = 0; my $verifying = 0; my $output = ""; my $shell_call = $anvil->data->{path}{exe}{dnf}." -y update; ".$anvil->data->{path}{exe}{echo}." return_code:\$?"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }}); while(<$file_handle>) { chomp; my $line = $_; $output .= $line."\n"; $line = $anvil->Words->clean_spaces({string => $line}); $anvil->data->{counts}{lines}++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "counts::lines" => $anvil->data->{counts}{lines}, line => $line }}); # If there were no updates, let the user know. if ($line =~ /^Nothing to do/i) { update_progress($anvil, 95, "message_0057"); } if ((not $verifying) && ($line =~ /^Verifying /i)) { # Update done, verifying now. $verifying = 1; update_progress($anvil, $progress, "message_0038"); } if ($line =~ /Running transaction/i) { # Done downloading if (not $transaction_shown) { update_progress($anvil, $progress, "message_0037"); $transaction_shown = 1; } } if ($line =~ /return_code:(\d+)$/) { my $return_code = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); if ($return_code == 0) { $success = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { success => $success }}); } } if ($line =~ / (\d+) Packages$/i) { my $counted_lines = $1; $package_changes = $counted_lines; $to_update += $counted_lines; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { package_changes => $package_changes, counted_lines => $counted_lines, to_update => $to_update, }}); } if ($line =~ /Total download size: (.*)$/i) { my $update_size = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update_size => $update_size }}); # Ready to install, update to 5%. The next step will count up to 95%. update_progress($anvil, $progress, "message_0035,!!size!$update_size!!"); # The total (reliable) count of events is (to_update * 3), counting '(x/y): ' # (download), 'Upgrading '/'Installing ' and 'Verifying '. We ignore the scriplet # and other lines as it's hard to predict how many there will be, and they pass fast # enough to not really matter for a progress bar. $to_update *= 4; $percent_step = $anvil->Convert->round({number => ($to_update / 90)}); $next_step = $percent_step; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { to_update => $to_update, percent_step => $percent_step, next_step => $next_step, }}); } # If 'percent_step' is set, we're ready to start counting lines. if (($percent_step) && (($line =~ /\(\d+\/\d+\): /) or ($line =~ /^Upgrading /i) or ($line =~ /^Installing /) or ($line =~ /^Cleanup /i) or ($line =~ /^Verifying /i))) { $counted_lines++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { counted_lines => $counted_lines }}); if ($line =~ /\(\d+\/\d+\): /) { $anvil->data->{counts}{downloaded}++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::downloaded" => $anvil->data->{counts}{downloaded} }}); } if (($line =~ /^Upgrading /i) or ($line =~ /^Installing /)) { $anvil->data->{counts}{installed}++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::installed" => $anvil->data->{counts}{installed} }}); } if ($line =~ /^Verifying /i) { $anvil->data->{counts}{verified}++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "counts::verified" => $anvil->data->{counts}{verified} }}); } if ($counted_lines > $next_step) { # Step up the progress. $next_step += $percent_step; $progress++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { next_step => $next_step, progress => $progress, }}); $progress = 95 if $progress > 95; update_progress($anvil, $progress, ""); } } # Update the progress if it's been more than a second since the last update. if (time > $anvil->data->{sys}{last_update}) { $progress = 95 if $progress > 95; update_progress($anvil, $progress, ""); } } close $file_handle; # If this is the first try and it failed, see if it's a DRBD issue. if ((not $success) && ($try == 1)) { # Is this the DRBD kmod issue? my $remove_drbd_kmod = 0; foreach my $line (split/\n/, $output) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); if ($line =~ /kmod-drbd/) { # Looks like it. $remove_drbd_kmod = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remove_drbd_kmod => $remove_drbd_kmod }}); last; } } # Clear the old kmod and try the update again. if ($remove_drbd_kmod) { update_progress($anvil, $progress++, "message_0320"); my $versions_to_remove = ""; my $shell_call = $anvil->data->{path}{exe}{dnf}." list installed"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); foreach my $line (split/\n/, $output) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); if ($line =~ /(kmod-drbd-\d+.*?)\s/) { $versions_to_remove .= $1." "; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { versions_to_remove => $versions_to_remove }}); } } # Now remove those packages. update_progress($anvil, $progress++, "message_0321"); $shell_call = $anvil->data->{path}{exe}{dnf}." -y remove ".$versions_to_remove; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); # Now install the new packages. update_progress($anvil, $progress++, "message_0322"); $shell_call = $anvil->data->{path}{exe}{dnf}." -y install kmod-drbd"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); # Now try again. update_progress($anvil, $progress++, "message_0323"); run_os_update($anvil, 2, $progress); return(0); } } # Reload daemons to pick up any changed systemctl daemons. my ($systemctl_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{systemctl}." daemon-reload", source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { systemctl_output => $systemctl_output, return_code => $return_code }}); ### See if the kernel has been updated. # Get the newest installed kernel $shell_call = $anvil->data->{path}{exe}{rpm}." -q kernel | ".$anvil->data->{path}{exe}{'sort'}." | ".$anvil->data->{path}{exe}{tail}." -n 1"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); (my $installed_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { installed_kernel => $installed_kernel, return_code => $return_code, }}); $installed_kernel =~ s/^kernel-(\d+.\d+\.\d+-\d+)\..*$/$1/; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { installed_kernel => $installed_kernel }}); # Get the running kernel $shell_call = $anvil->data->{path}{exe}{uname}." -r"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); (my $active_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_kernel => $active_kernel, return_code => $return_code, }}); $active_kernel =~ s/(\d+.\d+\.\d+-\d+)\..*$/$1/; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_kernel => $active_kernel }}); if ($installed_kernel ne $active_kernel) { # Reboot needed $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }}); my $reboot_needed = $anvil->System->reboot_needed({set => 1}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }}); } # If we installed and packages, and '--reboot' was given, reboot anyway. if (($package_changes) && ($anvil->data->{switches}{reboot})) { # Reboot needed $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }}); my $reboot_needed = $anvil->System->reboot_needed({set => 1}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }}); } # Did it work? if (not $success) { # Nope. update_progress($anvil, 0, "message_0036"); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0035", variables => { output => $output } }); sleep 5; $anvil->nice_exit({exit_code => 3}); } return(0); };