diff --git a/Anvil/Tools/Convert.pm b/Anvil/Tools/Convert.pm index 28c50483..dc1f3538 100644 --- a/Anvil/Tools/Convert.pm +++ b/Anvil/Tools/Convert.pm @@ -848,6 +848,12 @@ sub format_mmddyy_to_yymmdd date => $date, }}); + # Sometimes we're passed '--' which is not strictly an error, so we'll return it back. + if ($date eq "--") + { + return($date); + } + if (not $date) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Convert->format_mmddyy_to_yymmdd()", parameter => "host_name" }}); @@ -905,7 +911,6 @@ sub host_name_to_ip } ### TODO: Check local cached information later. - # Try to resolve it using 'gethostip'. my $shell_call = $anvil->data->{path}{exe}{gethostip}." -d ".$host_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index d962a618..69cc241a 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -952,9 +952,15 @@ sub configure_pgsql } # Start or restart the daemon? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { + 's1:running' => $running, + 's2:update_postgresql_file' => $update_postgresql_file, + 's3:update_pg_hba_file' => $update_pg_hba_file, + }}); if (not $running) { # Did we initialize? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { initialized => $initialized }}); if ($initialized) { # Start the daemon. @@ -991,6 +997,11 @@ sub configure_pgsql } # Do user and DB checks only if we're made a change above. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { + 's1:initialized' => $initialized, + 's2:update_postgresql_file' => $update_postgresql_file, + 's3:update_pg_hba_file' => $update_pg_hba_file, + }}); if (($initialized) or ($update_postgresql_file) or ($update_pg_hba_file)) { # Create the .pgpass file, if needed. diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index 23004517..d9258c55 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -1770,6 +1770,16 @@ LIMIT 1 password_length => $password_length, }}); + # If the password has spaces, some IPMI BMCs won't allow them. If we need to use it, we'll take out + # the spaces and shrink the length. + my $ipmi_no_space_password = ""; + if ($ipmi_password =~ /\s/) + { + $ipmi_no_space_password = $ipmi_password; + $ipmi_no_space_password =~ s/\s//g; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { ipmi_no_space_password => $ipmi_no_space_password }}); + } + my $subnet_mask = ""; my $gateway = ""; my $in_network = ""; @@ -2075,6 +2085,7 @@ LIMIT 1 my $wait_until = time + 120; while ($waiting) { + my $debug = 2; my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{ipmitool}." user list ".$lan_channel}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { output => $output, @@ -2118,6 +2129,8 @@ LIMIT 1 } } } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { user_name => $user_name }}); + last if $user_name; # Try again later or give up? if (time > $wait_until) @@ -2137,6 +2150,7 @@ LIMIT 1 sleep 10; } } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { user_name => $user_name }}); if (not $user_name) { # Failed to find a user. @@ -2225,6 +2239,13 @@ LIMIT 1 } else { + # If we used the no-space password, set it as the ipmi_password now. + if ($ipmi_no_space_password) + { + $ipmi_password = $ipmi_no_space_password; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { ipmi_password => $ipmi_password }}); + } + # Change the password and then try again. my $escaped_ipmi_password = shell_quote($ipmi_password); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { escaped_ipmi_password => $escaped_ipmi_password }}); diff --git a/scancore-agents/scan-cluster/scan-cluster b/scancore-agents/scan-cluster/scan-cluster index 6bf45ce9..9e13d4d6 100755 --- a/scancore-agents/scan-cluster/scan-cluster +++ b/scancore-agents/scan-cluster/scan-cluster @@ -214,7 +214,7 @@ sub check_fence_delay }}); if ((not $local_server_count) && (not $peer_server_count)) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0636"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0636"}); return(0); } elsif (($local_server_count) && ($peer_server_count)) diff --git a/share/words.xml b/share/words.xml index bec84e10..9ab7d7d2 100644 --- a/share/words.xml +++ b/share/words.xml @@ -2072,6 +2072,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: [ Note ] - We need to build the DRBD kernel module. This can take a few minutes, please be patient! Use 'journalctl -f' to monitor the build process. Successfully built and installed the new DRBD kernel module! We were asked to resync the database, but this host is hosting: [#!variable!count!#] server(s). Resync is not allowed when servers are running to reduce the risk the kernel's out of memory handler shooting a VM if the resync consumes too much RAM. You can see which servers are running with 'virsh list' and look for servers whose states are "running", "paused", "in shutdown" or "pmsuspended". + Testing that our short host name resolves to one of our IP prior to starting the cluster. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -3101,6 +3102,7 @@ We will sleep a bit and try again. [ Warning ] - The postgresql server is not installed yet. Sleeping for a bit, then will check again. [ Warning ] - Failed to build or install the DRBD kernel module! It is very unlikely that this machine will be able to run any servers until this is fixed. [ Warning ] - Table: [history.#!variable!table!#] not found. + [ Warning ] - Holding off starting the cluster. Tested access to ourself, and failed. Is '/etc/hosts' populated? Will try again in ten seconds. diff --git a/tools/anvil-boot-server b/tools/anvil-boot-server index 134c3fb8..90ee9b19 100755 --- a/tools/anvil-boot-server +++ b/tools/anvil-boot-server @@ -321,12 +321,16 @@ sub boot_all_servers ### TODO: Manage the boot order here. # We top out at 90, bottom is 20. my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}}; - my $increment = int(70 / $server_count); - my $percent = 15; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - server_count => $server_count, - increment => $increment, - }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_count => $server_count }}); + if (not $server_count) + { + # No servers exist yet. + return(0); + } + + my $increment = int(70 / $server_count); + my $percent = 15; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { increment => $increment }}); foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}}) { my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; diff --git a/tools/anvil-check-memory b/tools/anvil-check-memory index ecce13b0..b6fc0ef6 100755 --- a/tools/anvil-check-memory +++ b/tools/anvil-check-memory @@ -50,10 +50,10 @@ if (not $anvil->data->{switches}{program}) # Find the PID(s) of the program. $anvil->data->{sys}{pids} = $anvil->System->pids({ignore_me => 1, program_name => $anvil->data->{switches}{program}}); -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { 'sys::pids' => $anvil->data->{sys}{pids} }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::pids' => $anvil->data->{sys}{pids} }}); my $pids_found = @{$anvil->data->{sys}{pids}}; -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { pids_found => $pids_found }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pids_found => $pids_found }}); if (not $pids_found) { @@ -66,7 +66,7 @@ if (not $pids_found) foreach my $pid (sort {$a cmp $b} @{$anvil->data->{sys}{pids}}) { my $smaps_path = "/proc/".$pid."/smaps"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { smaps_path => $smaps_path }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { smaps_path => $smaps_path }}); # This will store the amount of RAM used by this specific PID. $anvil->data->{memory}{pid}{$pid} = 0; diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 72ac9d04..e3f75f0f 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -1256,17 +1256,22 @@ sub prep_database $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); if ($host_type eq "striker") { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + prep_database => $prep_database, + "sys}{database}{connections" => $anvil->data->{sys}{database}{connections}, + }}); if ($prep_database) { ### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging. - #my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__ }); - my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure", source => $THIS_FILE, line => __LINE__ }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => , source => $THIS_FILE, line => __LINE__ }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { database_output => $database_output, return_code => $return_code, }}); } - else + elsif (not $anvil->data->{sys}{database}{connections}) { # Start the daemon locally, if needed. my $running = $anvil->System->check_daemon({daemon => "postgresql"}); diff --git a/tools/anvil-safe-start b/tools/anvil-safe-start index e88672f0..cb4f50d2 100755 --- a/tools/anvil-safe-start +++ b/tools/anvil-safe-start @@ -237,6 +237,51 @@ sub start_pacemaker # Nope, start it. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"}); + # NOTE: In some odd cases, this can try to run before /etc/hosts has been populated. So wait + # until we can access ourself. + my $ok = 0; + until ($ok) + { + # Convert out short host name to an IP and verify that the IP is one of ours. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0681"}); + my $local_bcn1_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $short_host_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_bcn1_ip => $local_bcn1_ip }}); + + if ($local_bcn1_ip) + { + # Is this one of our IPs, or is DNS being a little shit? + if (exists $anvil->data->{network}{$short_host_name}) + { + delete $anvil->data->{network}{$short_host_name}; + } + $anvil->Network->get_ips(); + + foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}}) + { + next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} eq ""; + next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} =~ /^127\.0\.0\./; + my $this_ip = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:interface' => $interface, + 's2:this_ip' => $this_ip, + }}); + if ($this_ip eq $local_bcn1_ip) + { + $ok = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ok => $ok }}); + last; + } + } + } + + if (not $ok) + { + # Sleep 10 seconds. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0134"}); + sleep 10; + } + } + ### TODO: A lot more testing is needed for degraded single-node start later. ### Should we use --all, or wait for our peer? For now, we wait. #my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";