Fixed a bug with hung cluster startup in some cases

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 11 months ago
parent dd0175e05c
commit 023bcf46a4
  1. 54
      tools/anvil-join-anvil

@ -495,20 +495,52 @@ sub configure_pacemaker
} }
if (time > $start_again) if (time > $start_again)
{ {
# Call cluster start again. ### NOTE: We can't just call 'start --all' again anymore. Now we need to
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0272"}); ### stop -> start. Before we do this, make sure there are no servers
$start_again = time + 60; ### running.
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all"; $start_again = time + 60;
my $restart = 1;
my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
start_again => $start_again, start_again => $start_again,
shell_call => $shell_call, server_count => $server_count,
}}); }});
foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"cib::parsed::data::server::${server}::active" => $anvil->data->{cib}{parsed}{data}{server}{$server}{active},
}});
if ($anvil->data->{cib}{parsed}{data}{server}{$server}{active})
{
$restart = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { restart => $restart }});
}
}
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { restart => $restart }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { if ($restart)
output => $output, {
return_code => $return_code, # Call cluster start again.
}}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0272"});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster stop --all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
start_again => $start_again,
shell_call => $shell_call,
}});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";
($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
} }
sleep 5 if not $both_online; sleep 5 if not $both_online;
} }

Loading…
Cancel
Save