Added node 2 joining an Anvil! node if not started by node 1.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 8 months ago
parent e00dec7cba
commit 3e63b726d3
  1. 2
      share/words.xml
  2. 3
      tools/anvil-daemon
  3. 33
      tools/anvil-join-anvil

@ -1387,7 +1387,7 @@ Failure! The return code: [#!variable!return_code!#] was received ('0' was expec
<key name="job_0161">* Please enter a number between 1 and #!variable!max_cores!#.</key>
<key name="job_0162">-=] Available cores / threads: [#!variable!cores!# / #!variable!threads!#]</key>
<key name="job_0163"> - Node #!variable!core!# CPU Model: [#!variable!model!#]</key>
<key name="job_0164">#!free!#</key>
<key name="job_0164">Node 1 hasn't started the cluster yet. We might be rejoining a cluster, so we'll try joining now.</key>
<key name="job_0165">RAM: ........... [#!variable!ram!#]</key>
<key name="job_0166">* Please enter a valid amount up to: [#!variable!ram_total!# / #!variable!ram_available!#].</key>
<key name="job_0167">-=] Available RAM: [#!variable!ram_available!#]

@ -1832,11 +1832,10 @@ sub run_jobs
started_job_uuid => $started_job_uuid,
"jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress},
}});
die if not $job_uuid;
if ($started_job_uuid eq $job_uuid)
{
# We're restarting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0742", variables => {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => {
command => $job_command,
job_uuid => $job_uuid,
}});

@ -951,6 +951,12 @@ sub configure_pacemaker
# We loop until the peer finishes or the peer's job hit's 100.
my $tried_starting = 0;
my $both_online = 0;
my $try_starting = time + 30;
my $delay = 5;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
delay => $delay,
try_starting => $try_starting,
}});
until($both_online)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -1022,10 +1028,29 @@ sub configure_pacemaker
}
if (not $both_online)
{
### TODO: Left off here... In case of rebuilding, we'll never get past this, so we need to try 'pcs cluster start' once.
my $delay = 5;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0816", variables => { delay => $delay }});
sleep $delay;
if (time > $try_starting)
{
# Try starting pacemaker, in case we're rebuilding.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0164");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0164"});
$try_starting += 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { try_starting => $try_starting }});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0816", variables => { delay => $delay }});
sleep $delay;
}
}
}
}

Loading…
Cancel
Save