Added a wait loop when forming node clusters.

* This adds a check where anvil-join-anvil waits until both subnodes are
  marked as configured and not in maintenance mode.
* Should address issue #479 (maybe, this shouldn't trigger reboots, but
  it was certainly a race condition found while investigating).

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 1 year ago
parent d58521ceca
commit 5d5270486e
  1. 2
      share/words.xml
  2. 3
      tools/anvil-configure-host
  3. 74
      tools/anvil-join-anvil

@ -1686,6 +1686,8 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0472">Server Storage Management</key>
<key name="job_0473">This job manages the storage on a given hosted server. It can grow an existing disk, add a new disk, insert an ISO into an optical disc, or eject a disc.</key>
<key name="job_0474">The server: [#!variable!server!#] will now be forced off!</key>
<key name="job_0475">The subnode: [#!variable!subnode!#] is not ready. Configured: [#!variable!configured!#], maintenance mode: [#!variable!maintenance_mode!#].</key>
<key name="job_0476">Waiting for a bit, then will check again.</key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>

@ -1686,7 +1686,8 @@ AND
if (-e $anvil->data->{path}{exe}{pcs})
{
# To make logs more sensible, we'll call 'problem' as 'out_of_cluster'.
my ($out_of_cluster) = $anvil->Cluster->parse_cib();
$anvil->data->{cib}{parsed}{'local'}{ready} = "" if not defined $anvil->data->{cib}{parsed}{'local'}{ready};
my ($out_of_cluster) = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
out_of_cluster => $out_of_cluster,
"cib::parsed::local::ready" => $anvil->data->{cib}{parsed}{'local'}{ready},

@ -182,10 +182,10 @@ sub configure_pacemaker
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $host_name = $anvil->Get->host_name;
my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $node1_host_uuid = $anvil->data->{sys}{node1_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name};
$node1_host_name =~ s/\..*$//;
my $node2_host_uuid = $anvil->data->{sys}{node2_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name};
$node2_host_name =~ s/\..*$//;
my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name;
@ -215,6 +215,9 @@ sub configure_pacemaker
return(0);
}
# Hold until both subnodes are marked as configured and not in maintenance mode.
wait_for_subnodes($anvil);
### Run on both nodes.
# Enable pcsd and start the pcsd daemon.
my ($return_code) = $anvil->System->enable_daemon({daemon => "pcsd.service"});
@ -2385,3 +2388,70 @@ sub update_progress
return(0);
}
sub wait_for_subnodes
{
my ($anvil) = @_;
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_uuid => $anvil_uuid,
node1_host_uuid => $node1_host_uuid,
node2_host_uuid => $node2_host_uuid,
}});
my $waiting = 1;
while($waiting)
{
my $waiting = 0;
foreach my $host_uuid ($node1_host_uuid, $node2_host_uuid)
{
my $host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
host_name => $host_name,
}});
my ($maintenance_mode, $variable_uuid, $modified_date) = $anvil->Database->read_variable({
variable_name => "maintenance_mode",
variable_source_table => "hosts",
variable_source_uuid => $host_uuid,
});
(my $configured, $variable_uuid, $modified_date) = $anvil->Database->read_variable({
variable_name => "system::configured",
variable_source_uuid => $host_uuid,
variable_source_table => "hosts",
});
$maintenance_mode = 1 if not defined $maintenance_mode;
$configured = 0 if not defined $configured;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
maintenance_mode => $maintenance_mode,
configured => $configured,
}});
if (($maintenance_mode) or (not $configured))
{
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting}});
update_progress($anvil, $anvil->data->{job}{progress}, "job_0475,!!subnode!".$host_name."!!,!!configured!".$configured."!!,!!maintenance_mode!".$maintenance_mode."!!");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0475", variables => {
subnode => $host_name,
configured => $configured,
maintenance_mode => $maintenance_mode,
}});
}
}
if ($waiting)
{
update_progress($anvil, $anvil->data->{job}{progress}, "job_0476");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0476"});
sleep 5;
}
}
return(0);
}

Loading…
Cancel
Save