From 9eec6c49779db0df9a28a75e1e3ced3e8b9aa53c Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 29 Nov 2021 22:43:23 -0500 Subject: [PATCH 1/5] * Created ScanCore->check_temperature_direct() based around that start logic from ScanCore->post_scan_analysis_striker() temperature check, and updated the later to use the former. * Updated the logic of when to boot a node or DR host that was found to be off for unknown reasons to require both poewr and temperature to be OK, and checks against the new 'feature::scancore::disable::boot-unknown-stop' config variable. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 4 +- Anvil/Tools/ScanCore.pm | 242 +++++++++++++++++++++++++++++----------- anvil.conf | 5 + share/words.xml | 7 +- 4 files changed, 190 insertions(+), 68 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 0d274766..3ee83f85 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -1363,7 +1363,7 @@ sub connect foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) { # Periodically, autovivication causes and empty key to appear. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uuid => $uuid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { uuid => $uuid }}); next if ((not $uuid) or (not $anvil->Validate->uuid({uuid => $uuid}))); if (($db_uuid) && ($db_uuid ne $uuid)) @@ -1387,7 +1387,7 @@ sub connect my $name = $anvil->data->{database}{$uuid}{name}; my $user = $anvil->data->{database}{$uuid}{user}; my $password = $anvil->data->{database}{$uuid}{password}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host => $host, port => $port, name => $name, diff --git a/Anvil/Tools/ScanCore.pm b/Anvil/Tools/ScanCore.pm index 9b50f8c0..7e2d6600 100644 --- a/Anvil/Tools/ScanCore.pm +++ b/Anvil/Tools/ScanCore.pm @@ -21,6 +21,7 @@ my $THIS_FILE = "ScanCore.pm"; # check_health # check_power # check_temperature +# check_temperature_direct # count_servers # post_scan_analysis # post_scan_analysis_dr @@ -1160,6 +1161,109 @@ ORDER BY } +=head2 check_temperature_direct + +This calls a target's IPMI interface to check the temperature sensors that are available. The status is returns as; + + 0 = Failed to read temperature sensors / IPMI unavailable + 1 = All available temperatures are nominal. + 2 = One of more sensors are in warning or critical. + +Parameters; + +=head3 host_uuid (Optional, default Get->host_uuid() ) + +This is the host's UUID to look at. + +=cut +sub check_temperature_direct +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->check_temperature_direct()" }}); + + my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : $anvil->Get->host_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host_uuid => $host_uuid, + }}); + + # * 0 - Failed to read temperature sensors / IPMI unavailable + # * 1 - All available temperatures are nominal + # * 2 - One of more sensors are in warning or critical. + my $status = 0; + if ((not defined $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi}) or (not $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi})) + { + $anvil->Database->get_hosts_info({debug => $debug}); + } + my $host_ipmi = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi}; + my $host_name = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host_ipmi => $host_ipmi, + host_name => $host_name, + }}); + + my ($ipmitool_command, $ipmi_password) = $anvil->Convert->fence_ipmilan_to_ipmitool({ + debug => 2, + fence_ipmilan_command => $host_ipmi, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + ipmitool_command => $ipmitool_command, + ipmi_password => $anvil->Log->is_secure($ipmi_password), + }}); + + if ((not $ipmitool_command) or ($ipmitool_command eq "!!error!!")) + { + # No IPMI tool to call. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0573", variables => { host_name => $host_name }}); + return($status); + } + + $anvil->System->collect_ipmi_data({ + debug => $debug, + host_name => $host_name, + ipmitool_command => $ipmitool_command, + ipmi_password => $ipmi_password, + }); + + # Now look for thermal values. + foreach my $sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) + { + my $current_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value}; + my $units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}; + my $sensor_status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + current_value => $current_value, + sensor_name => $sensor_name, + units => $units, + sensor_status => $sensor_status, + }}); + + # If this is a temperature, check to see if it is outside its nominal range and, if + # so, record it into a hash for loading into ScanCore's 'temperature' table. + if ($units eq "C") + { + if ($sensor_status eq "ok") + { + # We've found at least one temperature sensor. Set status to '1' if not previously set + $status = 1 if not $status; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status }}); + } + else + { + # Sensor isn't OK yet. + $status = 2 if not $status; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status }}); + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status }}); + return($status); +} + + =head2 count_servers This returns the number of servers running on a given host, as reported by ScanCore (specifically, by counting the number of servers running on the host from the C<< servers >> table). It also counts the total amount of RAM in use by hosted servers. @@ -2527,9 +2631,7 @@ LIMIT 1;"; if (not $stop_reason) { $stop_reason = "unknown"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { stop_reason => $stop_reason }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0565", variables => { host_name => $host_name }}); - } if ($stop_reason eq "user") @@ -2538,7 +2640,71 @@ LIMIT 1;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0566", variables => { host_name => $host_name }}); next; } - elsif (($stop_reason eq "power") or ($stop_reason eq "unknown")) + elsif ($stop_reason eq "unknown") + { + # Check both power and temp. + if ((not defined $anvil->data->{feature}{scancore}{disable}{'boot-unknown-stop'}) or (not exists $anvil->data->{feature}{scancore}{disable}{'boot-unknown-stop'}) or ($anvil->data->{feature}{scancore}{disable}{'boot-unknown-stop'} eq "")) + { + $anvil->data->{feature}{scancore}{disable}{'boot-unknown-stop'} = 1; + } + if (not $anvil->data->{feature}{scancore}{disable}{'boot-unknown-stop'}) + { + # Ignore. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0671", variables => { host_name => $host_name }}); + } + else + { + # Evaluate for boot. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0672", variables => { host_name => $host_name }}); + + # Check power + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0567", variables => { host_name => $host_name }}); + my ($power_health, $shortest_time_on_batteries, $highest_charge_percentage, $estimated_hold_up_time) = $anvil->ScanCore->check_power({ + debug => $debug, + anvil_uuid => $anvil_uuid, + anvil_name => $anvil_name, + host_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + power_health => $power_health, + shortest_time_on_batteries => $shortest_time_on_batteries, + highest_charge_percentage => $highest_charge_percentage, + estimated_hold_up_time => $estimated_hold_up_time, + }}); + + # Check temp. + my ($temp_health) = $anvil->ScanCore->check_temperature_direct({ + debug => $debug, + host_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { temp_health => $temp_health }}); + + ### Temp + # * 0 = Failed to read temperature sensors / IPMI unavailable + # * 1 = All available temperatures are nominal. + # * 2 = One of more sensors are in warning or critical. + ### Power + # * 0 = No UPSes found for the host + # * 1 = One or more UPSes found and at least one has input power from mains. + # * 2 = One or more UPSes found, all are running on battery. + if (($temp_health eq "1") && ($power_health eq "1")) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0673", variables => { host_name => $host_name }}); + + $shell_call =~ s/--action status/ --action on/; + my ($output, $return_code) = $anvil->System->call({debug => $debug, timeout => 30, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + # Mark it as booting. + $anvil->Database->update_host_status({ + debug => $debug, + host_uuid => $host_uuid, + host_status => "booting", + }); + } + } + } + elsif ($stop_reason eq "power") { # Check now if the power is OK $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0567", variables => { host_name => $host_name }}); @@ -2590,72 +2756,20 @@ LIMIT 1;"; } elsif ($stop_reason eq "thermal") { - ### TODO: Switch to ->check_temperature() # Check now if the temperature is OK. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0568", variables => { host_name => $host_name }}); - - my ($ipmitool_command, $ipmi_password) = $anvil->Convert->fence_ipmilan_to_ipmitool({ - debug => 2, - fence_ipmilan_command => $host_ipmi, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - ipmitool_command => $ipmitool_command, - ipmi_password => $anvil->Log->is_secure($ipmi_password), - }}); - - if ((not $ipmitool_command) or ($ipmitool_command eq "!!error!!")) - { - # No IPMI tool to call. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0573", variables => { host_name => $host_name }}); - next; - } - - $anvil->System->collect_ipmi_data({ - host_name => $host_name, - ipmitool_command => $ipmitool_command, - ipmi_password => $ipmi_password, + my ($temp_health) = $anvil->ScanCore->check_temperature_direct({ + debug => $debug, + host_uuid => $host_uuid, }); - # Now look for thermal values. - my $sensor_found = 0; - my $temperatures_ok = 1; - foreach my $sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) - { - my $current_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value}; - my $units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}; - my $status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - current_value => $current_value, - sensor_name => $sensor_name, - units => $units, - status => $status, - }}); - - # If this is a temperature, check to see if it is outside its nominal range and, if - # so, record it into a hash for loading into ScanCore's 'temperature' table. - if ($units eq "C") - { - if (not $sensor_found) - { - # We've found at least one temperature sensor. - $sensor_found = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { sensor_found => $sensor_found }}); - } - - if ($status ne "ok") - { - # Sensor isn't OK yet. - $temperatures_ok = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { temperatures_ok => $temperatures_ok }}); - } - } - } + ### Temp + # * 0 = Failed to read temperature sensors / IPMI unavailable + # * 1 = All available temperatures are nominal. + # * 2 = One of more sensors are in warning or critical. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { temp_health => $temp_health }}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - sensor_found => $sensor_found, - temperatures_ok => $temperatures_ok, - }}); - if (($sensor_found) && ($temperatures_ok)) + if ($temp_health eq "1") { ### TODO: We'll want to revisit M2's restart cooldown logic. It never ### actually proved useful in M2, but it doesn't mean it wouldn't help diff --git a/anvil.conf b/anvil.conf index a292cebe..15eb02e8 100644 --- a/anvil.conf +++ b/anvil.conf @@ -11,6 +11,11 @@ sys::privacy::strong = 0 # feature, set this to '1'. feature::scancore::disable::preventative-live-migration = 0 +# If a node is found to be powered off, and there is no reason recorded in the database, it will be booted. +# The assumption is that an accidental power off occurred. If you would like to have nodes that power off +# stay off until manually started, set this to '0' +#feature::scancore::disable::boot-unknown-stop = 1 + ### Database # Database connections; # diff --git a/share/words.xml b/share/words.xml index 91ff0d07..1c75d45f 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1947,7 +1947,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The host: [#!variable!host_name!#] is up, no need to check if it needs booting. The host: [#!variable!host_name!#] couldn't be reached directly, but IPMI reports that it is up. Could the IPMI BMC be hung or unplugged? The host: [#!variable!host_name!#] is off. Will check now if it should be booted. - The host: [#!variable!host_name!#] has no stop reason, so we'll boot it up in case it lost power without warning. + The host: [#!variable!host_name!#] has no stop reason, so we'll check to see if we should power it on, in case it lost power or overheated without warning. The host: [#!variable!host_name!#] was stopped by the user, so we'll leave it off. The host: [#!variable!host_name!#] was powered off because of power loss. Checking to see if it is now safe to restart it. The host: [#!variable!host_name!#] was powered off because of thermal issues. Checking to see if it is now safe to restart it. @@ -1955,7 +1955,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Unable to parse the install manifest uuid: [#!variable!manifest_uuid!#] for the Anvil! [#!variable!anvil_name!#]. As such, unable to determine what UPSes power the machine: [#!variable!host_name!#]. Unable to determine if the power feeding this node is OK or not. The UPS referenced by the 'power_uuid': [#!variable!power_uuid!#] under the host: [#!variable!host_name!#] has no record of being on mains power, so we can't determine how long it's been on batteries. Setting the "shortest time on batteries" to zero seconds. Marking the host as 'online' and clearing the host's stop reason. - The host: [#!variable!host_name!#] is off, but there appears to be a problem translating the 'fence_ipmilan' into a workable 'ipmitool' command. Unable to check the thermal data of the host, and so, unable to determine if it's safe to boot the node. + There appears to be a problem translating the 'fence_ipmilan' into a workable 'ipmitool' command for the host: [#!variable!host_name!#]. Unable to check the thermal data of the host. The host: [#!variable!host_name!#] was powered off because of power loss. Power is back and the UPSes are sufficiently charged. Booting it back up now. The host: [#!variable!host_name!#] was powered off for thermal reasons. All available thermal sensors read as OK now. Booting it back up now. The file: [#!variable!file_path!#] isn't on (or isn't the right size on) Striker: [#!variable!host_name!#]. Not using it to pull from. @@ -2061,6 +2061,9 @@ The file: [#!variable!file!#] needs to be updated. The difference is: No password for the database on the host with UUID: [#!variable!uuid!#], skipping it. The firewalld daemon isn't running, skipping firewall setup. The postgresql server is installed. + The host: [#!variable!host_name!#] was powered off for an unknown reason, and 'feature::scancore::disable::boot-unknown-stop' is set to: [#!data!feature::scancore::disable::boot-unknown-stop!#]. Will not boot this host. + The host: [#!variable!host_name!#] was powered off for an unknown reason, and 'feature::scancore::disable::boot-unknown-stop' is set to: [#!data!feature::scancore::disable::boot-unknown-stop!#]. If power and temperature looks good, we'll boot it. + The host: [#!variable!host_name!#] has good power and temperature readings. Booting it back up now. The host name: [#!variable!target!#] does not resolve to an IP address. From 65dfc22a38774a96c6cd6df320d31293b360bd3b Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 30 Nov 2021 17:58:29 -0500 Subject: [PATCH 2/5] Added an eval{} call around Database->query()'s ->prepare() DBI call to better handle lost database handle. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 26 +++++++++++++++++++++++--- share/words.xml | 2 ++ tools/anvil-daemon | 3 +-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 3ee83f85..2dfadcb9 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -803,7 +803,7 @@ sub configure_pgsql # Make sure we have an entry in our own anvil.conf. my $local_uuid = $anvil->Database->get_local_uuid(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_uuid => $local_uuid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { local_uuid => $local_uuid }}); # If we didn't get the $local_uuid, then there is no entry for this system in anvil.conf yet, so we'll add it. if (not $local_uuid) @@ -15393,12 +15393,27 @@ sub query } # Do the query. - my $DBreq = $anvil->data->{cache}{database_handle}{$uuid}->prepare($query) or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0075", variables => { + local $@; + my $DBreq = eval { $anvil->data->{cache}{database_handle}{$uuid}->prepare($query) or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0075", variables => { query => (not $secure) ? $query : $anvil->Log->is_secure($query), server => $say_server, db_error => $DBI::errstr, + }}); }; + if ($@) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0675", variables => { + query => (not $secure) ? $query : $anvil->Log->is_secure($query), + server => $say_server, + eval_error => $@, }}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { DBreq => $DBreq }}); + return("!!error!!"); + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + uuid => $uuid, + query => (not $secure) ? $query : $anvil->Log->is_secure($query), + say_server => $say_server, + DBreq => $DBreq, + }}); # Execute on the query $DBreq->execute() or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0076", variables => { @@ -15667,6 +15682,7 @@ sub resync_databases ### to avoid trouble with primary/foreign keys. # We're going to use the array of tables assembles by _find_behind_databases() stored in # 'sys::database::check_tables' + my $start_time = time; foreach my $table (@{$anvil->data->{sys}{database}{check_tables}}) { # We don't sync 'states' as it's transient and sometimes per-DB. @@ -16117,6 +16133,10 @@ sub resync_databases $anvil->data->{sys}{database}{resync_needed} = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'sys::database::resync_needed' => $anvil->data->{sys}{database}{resync_needed} }}); + my $time_taken = time - $start_time; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { time_taken => $time_taken }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0674", variables => { took => $time_taken }}); + return(0); } diff --git a/share/words.xml b/share/words.xml index 1c75d45f..ba99971f 100644 --- a/share/words.xml +++ b/share/words.xml @@ -2064,6 +2064,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The host: [#!variable!host_name!#] was powered off for an unknown reason, and 'feature::scancore::disable::boot-unknown-stop' is set to: [#!data!feature::scancore::disable::boot-unknown-stop!#]. Will not boot this host. The host: [#!variable!host_name!#] was powered off for an unknown reason, and 'feature::scancore::disable::boot-unknown-stop' is set to: [#!data!feature::scancore::disable::boot-unknown-stop!#]. If power and temperature looks good, we'll boot it. The host: [#!variable!host_name!#] has good power and temperature readings. Booting it back up now. + The resync has completed in: [#!variable!took!#] second(s). + Log->secure' is not set. ]]> The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 29142c78..2db260ce 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -92,7 +92,6 @@ $anvil->System->_check_anvil_conf(); # Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks # is to setup the database server. $anvil->Database->connect({ - debug => 2, check_if_configured => 1, check_for_resync => 1, }); @@ -109,7 +108,7 @@ if (not $anvil->data->{sys}{database}{connections}) prep_database($anvil); # Try connecting again - $anvil->Database->connect({debug => 2, check_if_configured => 1, check_for_resync => 1}); + $anvil->Database->connect({check_if_configured => 1, check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { From 9cfd7b9b94319c7ea938a3ba18fc3e10a99b9f74 Mon Sep 17 00:00:00 2001 From: Digimer Date: Wed, 1 Dec 2021 18:43:50 -0500 Subject: [PATCH 3/5] Created the new (and still in development) striker-file-manager to manage files from a Striker dashboard's command line. So far. it will add files only. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 2 + tools/anvil-sync-shared | 2 +- tools/striker-file-manager | 318 ++++++++++++++++++++++++++++ tools/striker-manage-install-target | 2 +- 4 files changed, 322 insertions(+), 2 deletions(-) create mode 100755 tools/striker-file-manager diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 2dfadcb9..9bda9807 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -15401,6 +15401,8 @@ sub query }}); }; if ($@) { + ### TODO: Report back somehow that the handle is dead. + $anvil->Database->disconnect({debug => $debug}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0675", variables => { query => (not $secure) ? $query : $anvil->Log->is_secure($query), server => $say_server, diff --git a/tools/anvil-sync-shared b/tools/anvil-sync-shared index d88294fe..a1cb6965 100755 --- a/tools/anvil-sync-shared +++ b/tools/anvil-sync-shared @@ -48,7 +48,7 @@ if (not $anvil->data->{switches}{'job-uuid'}) $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); } -# If we still don't have a job-uuit, go into interactive mode. +# If we still don't have a job-uuid, go into interactive mode. if ($anvil->data->{switches}{'job-uuid'}) { # Load the job data. diff --git a/tools/striker-file-manager b/tools/striker-file-manager new file mode 100755 index 00000000..aed0d50c --- /dev/null +++ b/tools/striker-file-manager @@ -0,0 +1,318 @@ +#!/usr/bin/perl +# +# This is the command line user interface for managing files on /mnt/shared/files on Strikers and made +# available on Anvil! systems. +# + +use strict; +use warnings; +use Anvil::Tools; +use Data::Dumper; +require POSIX; +use Term::Cap; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); + +# Read switches (target ([user@]host[:port]) and the file with the target's password. +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); + +# Connect to the database(s). +$anvil->Database->connect; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); + + +my $termios = new POSIX::Termios; +$termios->getattr; +my $ospeed = $termios->getospeed; + +my $terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed }; +$terminal->Trequire(qw/ce ku kd/); + +interactive_menu($anvil, $termios); + +$anvil->nice_exit({exit_code => 0}); + + + +############################################################################################################# +# Functions # +############################################################################################################# + +sub interactive_menu +{ + my ($anvil, $termios) = @_; + + # This has to run on a striker, so is this a Striker? + my $host_type = $anvil->Get->host_type; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); + if ($host_type ne "striker") + { + print "Managing files must be run on a Striker dashboard. Exiting\n"; + $anvil->nice_exit({exit_code => 1}); + } + + $anvil->data->{manaing}{file} = ""; + $anvil->data->{manaing}{anvil} = ""; + while(1) + { + # Get a list of files we already know about. Database->get_anvils() also loads files and + # file_locations data + $anvil->Database->get_anvils; + my $longest_file_name = 0; + + print $terminal->Tputs('cl'); + print "-=] Anvil! File Management\n\n"; + # Show the main menu. + print "[ 1 ] - Add a new file.\n"; + print "[ 2 ] - Manage an existing file.\n"; + print "[ 3 ] - Manage files on an Anvil!\n"; + print "\n"; + print "[ Q ] - Quit\n"; + print "\n"; + print $terminal->Tgoto('cm', 0, 8)."? "; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + + if ($answer eq "1") + { + interactive_menu_add_file($anvil, $terminal); + } + elsif ($answer eq "2") + { + interactive_menu_manage_file($anvil, $terminal); + } + elsif ($answer eq "2") + { + interactive_menu_manage_anvil($anvil, $terminal); + } + elsif (lc($answer) eq "q") + { + print "NO CARRIER, good bye.\n"; + $anvil->nice_exit({exit_code => 0}); + } + } + + return(0); +} + +sub interactive_menu_add_file +{ + my ($anvil, $terminal) = @_; + + print $terminal->Tputs('cl'); + print "-=] Anvil! File Management - Add a new file\n\n"; + + # Build a list of files in /mnt/shared/incoming/ that are not yet in the database. + get_file_list($anvil); + + # Start the array with an empty entry so that users can answer '1' for the first file. + my $files = [""]; + foreach my $file_name (sort {$a cmp $b} keys %{$anvil->data->{manage_files}}) + { + if ($anvil->data->{manage_files}{$file_name}{file_new}) + { + push @{$files}, $file_name; + } + } + my $file_count = (@{$files} - 1); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_count => $file_count }}); + + if (not $file_count) + { + print "data->{path}{directories}{shared}{incoming}."]>\n"; + # Show the main menu. + print "\n"; + print "[ B ] - Back\n"; + print "[ Q ] - Quit\n"; + print "\n"; + print $terminal->Tgoto('cm', 0, 7)."? "; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + if (lc($answer) eq "b") + { + interactive_menu($anvil, $terminal); + } + elsif (lc($answer) eq "q") + { + print "NO CARRIER, good bye.\n"; + $anvil->nice_exit({exit_code => 0}); + } + else + { + interactive_menu_add_file($anvil, $terminal); + } + } + else + { + my $pad = 1; + if ($file_count > 9) + { + $pad = 2; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pad => $pad }}); + + foreach my $i (0..$file_count) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "files->[".$i."]" => $files->[$i] }}); + next if $files->[$i] eq ""; + my $file_name = $files->[$i]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_name => $file_name }}); + print "[ ".sprintf("%${pad}s", $i)." ] - ".$file_name."\n"; + } + print "\n"; + print "[ B ] - Back\n"; + print "[ Q ] - Quit\n"; + print "\n"; + print $terminal->Tgoto('cm', 0, ($file_count + 6))."Which file would you like to add? "; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + if (($answer =~ /^\d+$/) && (exists $files->[$answer]) and ($files->[$answer])) + { + my $file_name = $files->[$answer]; + print $terminal->Tputs('cl'); + print "-=] Anvil! File Management - Add a new file\n\n"; + print "Confirm addition of: [".$file_name."] [y/N] ?\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + if ($answer =~ /^y/i) + { + print "Creating a job to add the file. Please be patient, it should be added shortly.\n"; + my $out_file = $anvil->data->{path}{directories}{shared}{incoming}."/".$file_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { out_file => $out_file }}); + + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + file => $THIS_FILE, + line => __LINE__, + job_command => $anvil->data->{path}{exe}{'anvil-sync-shared'}, + job_data => "file=".$out_file, + job_name => "storage::move_incoming", + job_title => "job_0132", + job_description => "job_0133", + job_progress => 0, + job_host_uuid => $anvil->data->{sys}{host_uuid}, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + print "- Job created as: [".$job_uuid."]\n\n"; + print "Press any key to return to the main menu.\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + interactive_menu($anvil, $terminal); + } + else + { + print "Aborted.\n"; + sleep 1; + interactive_menu_add_file($anvil, $terminal); + } + } + elsif (lc($answer) eq "b") + { + interactive_menu($anvil, $terminal); + } + elsif (lc($answer) eq "q") + { + print "NO CARRIER, good bye.\n"; + $anvil->nice_exit({exit_code => 0}); + } + else + { + interactive_menu_add_file($anvil, $terminal); + } + } + + return(0); +} + +sub interactive_menu_manage_file +{ + my ($anvil, $terminal) = @_; + + print $terminal->Tputs('cl'); + print "-=] Anvil! File Management - Manage an existing file\n\n"; + + if (0) + { + } + else + { + print "NO CARRIER, good bye.\n"; + $anvil->nice_exit({exit_code => 0}); + } + + return(0); +} + +sub interactive_menu_manage_anvil +{ + my ($anvil, $terminal) = @_; + + + print $terminal->Tputs('cl'); + print "-=] Anvil! File Management - Manage Files on an Anvil!\n\n"; + + if (0) + { + } + else + { + print "NO CARRIER, good bye.\n"; + $anvil->nice_exit({exit_code => 0}); + } + + return(0); +} + +# This looks for files in /mnt/shared/incoming and collects their file_uuid, if found in the database. +sub get_file_list +{ + my ($anvil) = @_; + + if (exists $anvil->data->{manage_files}) + { + delete $anvil->data->{manage_files}; + } + my $directory = $anvil->data->{path}{directories}{shared}{incoming}; + local(*DIRECTORY); + opendir(DIRECTORY, $directory); + while(my $file_name = readdir(DIRECTORY)) + { + next if $file_name eq "."; + next if $file_name eq ".."; + # Ignore hidden files (which includes files still being copied) + next if $file_name =~ /^\./; + my $full_path = $directory."/".$file_name; + + # No file should match, but just in case... + if (exists $anvil->data->{files}{file_name}{$file_name}) + { + ### TODO: Log that this is a duplicate. + $anvil->data->{manage_files}{$file_name}{file_uuid} = $anvil->data->{files}{file_name}{$file_name}{file_uuid}; + $anvil->data->{manage_files}{$file_name}{file_new} = 0; + + } + else + { + $anvil->data->{manage_files}{$file_name}{file_uuid} = ""; + $anvil->data->{manage_files}{$file_name}{file_new} = 1; + } + } + closedir(DIRECTORY); + + return(0); +} diff --git a/tools/striker-manage-install-target b/tools/striker-manage-install-target index 6fa6f859..cbf97873 100755 --- a/tools/striker-manage-install-target +++ b/tools/striker-manage-install-target @@ -176,7 +176,7 @@ if ($anvil->data->{switches}{disable}) # Exit if we're not configured yet my $configured = $anvil->System->check_if_configured; -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { configured => $configured }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); if (not $configured) { print $anvil->Words->string({key => "error_0046"})."\n"; From cc4c3bd3e36e8c7d7dce5c2427d748ec2b415be2 Mon Sep 17 00:00:00 2001 From: Digimer Date: Thu, 2 Dec 2021 12:41:21 -0500 Subject: [PATCH 4/5] Added ssh askpass for Striker so VMM can ask to confirm fingerprints. Signed-off-by: Digimer --- anvil.spec.in | 1 + 1 file changed, 1 insertion(+) diff --git a/anvil.spec.in b/anvil.spec.in index 20e074fc..bdf7b6ad 100644 --- a/anvil.spec.in +++ b/anvil.spec.in @@ -131,6 +131,7 @@ Requires: gdm Requires: gnome-terminal Requires: httpd Requires: nmap +Requires: openssh-askpass Requires: postgresql-server Requires: syslinux Requires: syslinux-nonlinux From 3346d31194f9563b091ed7f9a0c0f9c9a839ebb6 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 7 Dec 2021 20:03:39 -0500 Subject: [PATCH 5/5] * Created Get->kernel_release() that returns the current kernel release (version) in use on the host or on a remote system. * Created DRBD->_initialize_drbd() to makes sure the DRBD kernel module can load and tries to build the module, if necessary. This is meant to provide support for clients that can't access needed internet resource (or the internet at all). Signed-off-by: Digimer --- Anvil/Tools.pm | 2 + Anvil/Tools/DRBD.pm | 113 ++++++++++++++++++++++++++++++++++++++++++ Anvil/Tools/Get.pm | 89 +++++++++++++++++++++++++++++++++ Anvil/Tools/System.pm | 1 + share/words.xml | 6 +++ tools/anvil-daemon | 6 ++- 6 files changed, 216 insertions(+), 1 deletion(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 16d4ad8c..27f7540b 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1102,6 +1102,7 @@ sub _set_paths units => "/usr/lib/systemd/system", }, exe => { + akmods => "/usr/sbin/akmods", 'alteeve-repo-setup' => "/usr/sbin/alteeve-repo-setup", 'anvil-boot-server' => "/usr/sbin/anvil-boot-server", 'anvil-change-password' => "/usr/sbin/anvil-change-password", @@ -1197,6 +1198,7 @@ sub _set_paths md5sum => "/usr/bin/md5sum", 'mkdir' => "/usr/bin/mkdir", modifyrepo_c => "/usr/bin/modifyrepo_c", + modprobe => "/usr/sbin/modprobe", mv => "/usr/bin/mv", nmap => "/usr/bin/nmap", nmcli => "/bin/nmcli", diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index 263579b9..247be8e4 100644 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -26,6 +26,7 @@ my $THIS_FILE = "DRBD.pm"; # reload_defaults # resource_uuid # update_global_common +# _initialize_kmod # =pod @@ -1835,6 +1836,7 @@ sub get_status return(0); } + =head2 manage_resource This takes a task, C<< up >>, C<< down >>, C<< primary >>, or C<< secondary >> and a resource name and acts on the request. @@ -3029,3 +3031,114 @@ sub update_global_common ############################################################################################################# # Private functions # ############################################################################################################# + +=head2 _initialize_kmod + +This checks to see if the C<< drbd >> kernel module can load. If not, a check is made to see if an RPM that matches the kernel exists. If so, it is installed. If not, C<< akmods >> is asked to build and install the drbd kernel module. + +Returns C<< 0 >> is the module loads or is already loaded. C<< !!error!! >> if not. + +=cut +sub _initialize_kmod +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "DRBD->_initialize_kmod()" }}); + + my $kernel_release = $anvil->Get->kernel_release({debug => $debug}); + my $shell_call = $anvil->data->{path}{exe}{modprobe}." drbd"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + kernel_release => $kernel_release, + shell_call => $shell_call, + }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + if (not $return_code) + { + # Loaded fine + return(0); + } + else + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0676"}); + my $install = 0; + my $shell_call = $anvil->data->{path}{exe}{dnf}." -q search kmod-drbd-".$kernel_release; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + foreach my $line (split/\n/, $output) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }}); + if ($line =~ /Name Exactly/) + { + # We can install. + $install = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { install => $install }}); + last; + } + } + + # Install or build? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { install => $install }}); + if ($install) + { + ### TODO: Should this be a background process? + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0677"}); + my $shell_call = $anvil->data->{path}{exe}{dnf}." -y install kmod-drbd-".$kernel_release; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + } + else + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0678"}); + my $shell_call = $anvil->data->{path}{exe}{akmods}." --force"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + } + + # In either case, try again. + $output = undef; + $return_code = undef; + $shell_call = $anvil->data->{path}{exe}{modprobe}." drbd"; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + if (not $return_code) + { + # Loaded fine + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0679"}); + return(0); + } + else + { + # Failed + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0132"}); + } + } + + return('!!error!!'); +} diff --git a/Anvil/Tools/Get.pm b/Anvil/Tools/Get.pm index 19564b66..187eef77 100644 --- a/Anvil/Tools/Get.pm +++ b/Anvil/Tools/Get.pm @@ -31,6 +31,7 @@ my $THIS_FILE = "Get.pm"; # host_uuid_from_name # host_type # host_uuid +# kernel_release # md5sum # os_type # server_uuid_from_name @@ -1777,6 +1778,94 @@ sub host_uuid return($anvil->{HOST}{UUID}); } + +=head2 kernel_release + +This returns the kernel release (same output as C<>) on the local or remote host. If there is a problem, C<< !!error!! >> is returned. + +Parameters; + +=head3 password (optional) + +This is the password to use when connecting to a remote machine. If not set, but C<< target >> is, an attempt to connect without a password will be made. + +=head3 port (optional) + +This is the TCP port to use when connecting to a remote machine. If not set, but C<< target >> is, C<< 22 >> will be used. + +=head3 remote_user (optional, default root) + +If C<< target >> is set, this will be the user we connect to the remote machine as. + +=head3 target (optional) + +This is the IP or host name of the machine to read the kernel release. If this is not set, the local system's kernel release is checked. + +=cut +sub kernel_release +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "System->kernel_release()" }}); + + my $password = defined $parameter->{password} ? $parameter->{password} : ""; + my $port = defined $parameter->{port} ? $parameter->{port} : ""; + my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root"; + my $target = defined $parameter->{target} ? $parameter->{target} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + target => $target, + port => $port, + remote_user => $remote_user, + password => $anvil->Log->is_secure($password), + }}); + + my $kernel_release = ""; + my $return_code = ""; + my $shell_call = $anvil->data->{path}{exe}{uname}." --kernel-release"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + if ($anvil->Network->is_local({host => $target})) + { + # Local call + ($kernel_release, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + kernel_release => $kernel_release, + return_code => $return_code, + }}); + } + else + { + # Remote call + ($kernel_release, my $error, $return_code) = $anvil->Remote->call({ + debug => $debug, + shell_call => $shell_call, + target => $target, + port => $port, + password => $password, + remote_user => $remote_user, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + kernel_release => $kernel_release, + error => $error, + return_code => $return_code, + }}); + + if ($return_code) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "err", key => "error_0356", variables => { + target => $target, + output => $kernel_release, + return_code => $return_code, + }}); + $kernel_release = "!!error!!"; + } + } + + return($kernel_release); +} + + =head2 md5sum This returns the C<< md5sum >> of a given file. diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index e4483c6c..23004517 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -3152,6 +3152,7 @@ sub host_name return($host_name, $descriptive); } + =head2 maintenance_mode This sets, clears or checks if the local system is in maintenance mode. Any system in maintenance mode will not be used by normal Anvil! tasks. diff --git a/share/words.xml b/share/words.xml index ba99971f..54e38203 100644 --- a/share/words.xml +++ b/share/words.xml @@ -501,6 +501,7 @@ The output, if any, was; ==== Failed to load the database file: [#!variable!file!#]. Deleting it so it's not considered in the next load attempt. + Failed to read the kernel release on the host: [#!variable!target!#]. The return code was: [#!variable!return_code!#] (expected '0') and the release output, if any, was: [#!variable!output!#]. @@ -2066,6 +2067,10 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The host: [#!variable!host_name!#] has good power and temperature readings. Booting it back up now. The resync has completed in: [#!variable!took!#] second(s). Log->secure' is not set. ]]> + [ Note ] - The DRBD kernel module failed to load. It is possible the kernel was updated. We will check to see if we can install a pre-built RPM, or if we need to build one ourselves. + Found an installable DRBD kernel module RPM that matches the current kernel. Installing it now. + [ Note ] - We need to build the DRBD kernel module. This can take a few minutes, please be patient! Use 'journalctl -f' to monitor the build process. + Successfully built and installed the new DRBD kernel module! The host name: [#!variable!target!#] does not resolve to an IP address. @@ -3091,6 +3096,7 @@ We will sleep a bit and try again. [ Warning ] - The storage group: [#!variable!storage_group_name!#] had the host: [#!variable!host_name!#] as a member. This host is not a member (anymore?) of the Anvil!: [#!variable!anvil_name!#]. Removing it from the storage group now. [ Warning ] - The postgresql server is not installed yet. Sleeping for a bit, then will check again. + [ Warning ] - Failed to build or install the DRBD kernel module! It is very likely that this machine will be able to run any servers until this is fixed. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 2db260ce..8b1aa0b0 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -1126,11 +1126,12 @@ sub handle_special_cases { my ($anvil) = @_; - # RHBZ #1961562 - https://bugzilla.redhat.com/show_bug.cgi?id=1961562#c16 my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); if ($host_type ne "striker") { + ### TODO: Test that this is fixed. The bug is now ERRATA + # RHBZ #1961562 - https://bugzilla.redhat.com/show_bug.cgi?id=1961562#c16 # We're a node or DR host. We need to touch this file. my $work_around_file = "/etc/qemu/firmware/50-edk2-ovmf-cc.json"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { work_around_file => $work_around_file }}); @@ -1147,6 +1148,9 @@ sub handle_special_cases group => "root", }); } + + # Make sure DRBD compiled after a kernel upgrade. + $anvil->DRBD->_initialize_kmod({debug => 2}); } return(0);