Merge pull request #210 from ClusterLabs/anvil-tools-dev

Anvil tools dev
main
digimer-bot 3 years ago committed by GitHub
commit 7401c7f879
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      Anvil/Tools/Convert.pm
  2. 11
      Anvil/Tools/Database.pm
  3. 21
      Anvil/Tools/System.pm
  4. 2
      scancore-agents/scan-cluster/scan-cluster
  5. 2
      share/words.xml
  6. 16
      tools/anvil-boot-server
  7. 6
      tools/anvil-check-memory
  8. 13
      tools/anvil-daemon
  9. 45
      tools/anvil-safe-start

@ -848,6 +848,12 @@ sub format_mmddyy_to_yymmdd
date => $date,
}});
# Sometimes we're passed '--' which is not strictly an error, so we'll return it back.
if ($date eq "--")
{
return($date);
}
if (not $date)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Convert->format_mmddyy_to_yymmdd()", parameter => "host_name" }});
@ -905,7 +911,6 @@ sub host_name_to_ip
}
### TODO: Check local cached information later.
# Try to resolve it using 'gethostip'.
my $shell_call = $anvil->data->{path}{exe}{gethostip}." -d ".$host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});

@ -952,9 +952,15 @@ sub configure_pgsql
}
# Start or restart the daemon?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
's1:running' => $running,
's2:update_postgresql_file' => $update_postgresql_file,
's3:update_pg_hba_file' => $update_pg_hba_file,
}});
if (not $running)
{
# Did we initialize?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { initialized => $initialized }});
if ($initialized)
{
# Start the daemon.
@ -991,6 +997,11 @@ sub configure_pgsql
}
# Do user and DB checks only if we're made a change above.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
's1:initialized' => $initialized,
's2:update_postgresql_file' => $update_postgresql_file,
's3:update_pg_hba_file' => $update_pg_hba_file,
}});
if (($initialized) or ($update_postgresql_file) or ($update_pg_hba_file))
{
# Create the .pgpass file, if needed.

@ -1770,6 +1770,16 @@ LIMIT 1
password_length => $password_length,
}});
# If the password has spaces, some IPMI BMCs won't allow them. If we need to use it, we'll take out
# the spaces and shrink the length.
my $ipmi_no_space_password = "";
if ($ipmi_password =~ /\s/)
{
$ipmi_no_space_password = $ipmi_password;
$ipmi_no_space_password =~ s/\s//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { ipmi_no_space_password => $ipmi_no_space_password }});
}
my $subnet_mask = "";
my $gateway = "";
my $in_network = "";
@ -2075,6 +2085,7 @@ LIMIT 1
my $wait_until = time + 120;
while ($waiting)
{
my $debug = 2;
my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{ipmitool}." user list ".$lan_channel});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
@ -2118,6 +2129,8 @@ LIMIT 1
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { user_name => $user_name }});
last if $user_name;
# Try again later or give up?
if (time > $wait_until)
@ -2137,6 +2150,7 @@ LIMIT 1
sleep 10;
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { user_name => $user_name }});
if (not $user_name)
{
# Failed to find a user.
@ -2225,6 +2239,13 @@ LIMIT 1
}
else
{
# If we used the no-space password, set it as the ipmi_password now.
if ($ipmi_no_space_password)
{
$ipmi_password = $ipmi_no_space_password;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { ipmi_password => $ipmi_password }});
}
# Change the password and then try again.
my $escaped_ipmi_password = shell_quote($ipmi_password);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { escaped_ipmi_password => $escaped_ipmi_password }});

@ -214,7 +214,7 @@ sub check_fence_delay
}});
if ((not $local_server_count) && (not $peer_server_count))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0636"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0636"});
return(0);
}
elsif (($local_server_count) && ($peer_server_count))

@ -2072,6 +2072,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0678">[ Note ] - We need to build the DRBD kernel module. This can take a few minutes, please be patient! Use 'journalctl -f' to monitor the build process.</key>
<key name="log_0679">Successfully built and installed the new DRBD kernel module!</key>
<key name="log_0680">We were asked to resync the database, but this host is hosting: [#!variable!count!#] server(s). Resync is not allowed when servers are running to reduce the risk the kernel's out of memory handler shooting a VM if the resync consumes too much RAM. You can see which servers are running with 'virsh list' and look for servers whose states are "running", "paused", "in shutdown" or "pmsuspended".</key>
<key name="log_0681">Testing that our short host name resolves to one of our IP prior to starting the cluster.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -3101,6 +3102,7 @@ We will sleep a bit and try again.
<key name="warning_0131">[ Warning ] - The postgresql server is not installed yet. Sleeping for a bit, then will check again.</key>
<key name="warning_0132">[ Warning ] - Failed to build or install the DRBD kernel module! It is very unlikely that this machine will be able to run any servers until this is fixed.</key>
<key name="warning_0133">[ Warning ] - Table: [history.#!variable!table!#] not found.</key>
<key name="warning_0134">[ Warning ] - Holding off starting the cluster. Tested access to ourself, and failed. Is '/etc/hosts' populated? Will try again in ten seconds.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. -->

@ -321,12 +321,16 @@ sub boot_all_servers
### TODO: Manage the boot order here.
# We top out at 90, bottom is 20.
my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}};
my $increment = int(70 / $server_count);
my $percent = 15;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_count => $server_count,
increment => $increment,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_count => $server_count }});
if (not $server_count)
{
# No servers exist yet.
return(0);
}
my $increment = int(70 / $server_count);
my $percent = 15;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { increment => $increment }});
foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}})
{
my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status};

@ -50,10 +50,10 @@ if (not $anvil->data->{switches}{program})
# Find the PID(s) of the program.
$anvil->data->{sys}{pids} = $anvil->System->pids({ignore_me => 1, program_name => $anvil->data->{switches}{program}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { 'sys::pids' => $anvil->data->{sys}{pids} }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::pids' => $anvil->data->{sys}{pids} }});
my $pids_found = @{$anvil->data->{sys}{pids}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { pids_found => $pids_found }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pids_found => $pids_found }});
if (not $pids_found)
{
@ -66,7 +66,7 @@ if (not $pids_found)
foreach my $pid (sort {$a cmp $b} @{$anvil->data->{sys}{pids}})
{
my $smaps_path = "/proc/".$pid."/smaps";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { smaps_path => $smaps_path }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { smaps_path => $smaps_path }});
# This will store the amount of RAM used by this specific PID.
$anvil->data->{memory}{pid}{$pid} = 0;

@ -1256,17 +1256,22 @@ sub prep_database
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type eq "striker")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
prep_database => $prep_database,
"sys}{database}{connections" => $anvil->data->{sys}{database}{connections},
}});
if ($prep_database)
{
### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging.
#my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__ });
my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure", source => $THIS_FILE, line => __LINE__ });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => , source => $THIS_FILE, line => __LINE__ });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
database_output => $database_output,
return_code => $return_code,
}});
}
else
elsif (not $anvil->data->{sys}{database}{connections})
{
# Start the daemon locally, if needed.
my $running = $anvil->System->check_daemon({daemon => "postgresql"});

@ -237,6 +237,51 @@ sub start_pacemaker
# Nope, start it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"});
# NOTE: In some odd cases, this can try to run before /etc/hosts has been populated. So wait
# until we can access ourself.
my $ok = 0;
until ($ok)
{
# Convert out short host name to an IP and verify that the IP is one of ours.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0681"});
my $local_bcn1_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $short_host_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_bcn1_ip => $local_bcn1_ip }});
if ($local_bcn1_ip)
{
# Is this one of our IPs, or is DNS being a little shit?
if (exists $anvil->data->{network}{$short_host_name})
{
delete $anvil->data->{network}{$short_host_name};
}
$anvil->Network->get_ips();
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
{
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} eq "";
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} =~ /^127\.0\.0\./;
my $this_ip = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:interface' => $interface,
's2:this_ip' => $this_ip,
}});
if ($this_ip eq $local_bcn1_ip)
{
$ok = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ok => $ok }});
last;
}
}
}
if (not $ok)
{
# Sleep 10 seconds.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0134"});
sleep 10;
}
}
### TODO: A lot more testing is needed for degraded single-node start later.
### Should we use --all, or wait for our peer? For now, we wait.
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";

Loading…
Cancel
Save