Updated scan-cluster to check to see that migrate_to and migrate_from are given a timeout of 600s and an on-fail of "block". Updated Cluster->add_server() to set migrate_from to timeout=600s and on-fail=block as well.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 2 years ago
parent ef84e63a7a
commit b7abc481e6
  1. 5
      Anvil/Tools/Cluster.pm
  2. 46
      scancore-agents/scan-cluster/scan-cluster
  3. 5
      share/words.xml

@ -236,9 +236,12 @@ sub add_server
### NOTE: 'INFINITY' doesn't work in some cases, so we set 1 day timeouts. If windows can't install
### an OS update in 24 hours, there's probably deeper issues.
###
### NOTE: If you update this command, check that scan-cluster->check_resources() is also updated!
###
### TODO: If the target_role is 'started' because the server was running, we may need to later do an
### update to set it to 'stopped' after we've verified it's in the cluster below.
my $resource_command = $anvil->data->{path}{exe}{pcs}." resource create ".$server_name." ocf:alteeve:server name=\"".$server_name."\" meta allow-migrate=\"true\" target-role=\"".$target_role."\" op monitor interval=\"60\" start timeout=\"60\" on-fail=\"block\" stop timeout=\"300\" on-fail=\"block\" migrate_to timeout=\"600\" on-fail=\"block\"";
my $resource_command = $anvil->data->{path}{exe}{pcs}." resource create ".$server_name." ocf:alteeve:server name=\"".$server_name."\" meta allow-migrate=\"true\" target-role=\"".$target_role."\" op monitor interval=\"60\" start timeout=\"60\" on-fail=\"block\" stop timeout=\"300\" on-fail=\"block\" migrate_to timeout=\"600\" on-fail=\"block\" migrate_from timeout=\"600\" on-fail=\"block\"";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource_command => $resource_command }});
my ($output, $return_code) = $anvil->System->call({shell_call => $resource_command});

@ -99,7 +99,7 @@ check_config($anvil);
# Check the fence delay
check_fence_delay($anvil);
# Check for failed resources
# Check for failed resources or resources that need updates
check_resources($anvil);
# Shut down.
@ -114,7 +114,10 @@ $anvil->ScanCore->agent_shutdown({agent => $THIS_FILE});
sub check_resources
{
my ($anvil) = @_;
my ($problem) = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{crm_mon}{parsed}{'pacemaker-result'}{resources}{resource}})
{
# This is used for alerts, if needed below.
@ -269,6 +272,45 @@ sub check_resources
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "scan_cluster_alert_0015", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0015", variables => $variables, set_by => $THIS_FILE});
}
# Check to see if server's parameters need updating
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server => $server }});
my $update = 0;
foreach my $op (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}})
{
my $name = $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}{$op}{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { name => $name }});
if (($name eq "migrate_to") or ($name eq "migrate_from"))
{
my $on_fail = exists $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}{$op}{'on-fail'} ? $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}{$op}{'on-fail'} : "";
my $timeout = exists $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}{$op}{timeout} ? $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server}{operations}{op}{$op}{timeout} : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
on_fail => $on_fail,
timeout => $timeout,
}});
if (($on_fail ne "block") or ($timeout != 600))
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
}
}
}
if ($update)
{
my $resource_command = $anvil->data->{path}{exe}{pcs}." resource update ".$server." ocf:alteeve:server name=\"".$server."\" meta allow-migrate=\"true\" op monitor interval=\"60\" start timeout=\"60\" on-fail=\"block\" stop timeout=\"300\" on-fail=\"block\" migrate_to timeout=\"600\" on-fail=\"block\" migrate_from timeout=\"600\" on-fail=\"block\"";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0739", variables => {
server => $server,
command => $resource_command,
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $resource_command});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
}

@ -972,8 +972,8 @@ resource #!variable!server!# {
<key name="header_0057">Link State</key>
<key name="header_0058">Duplex</key>
<key name="header_0059">Link Drops</key>
<key name="header_0060"><![CDATA[-=[ Bond Status - #!variable!date!# ] =-]]></key>
<key name="header_0061"><![CDATA[-=[ Ctrl + C to exit ] =-]]></key>
<key name="header_0060"><![CDATA[-=[ Bond Status - #!variable!date!# ]=-]]></key>
<key name="header_0061"><![CDATA[-=[ Ctrl + C to exit ]=-]]></key>
<key name="header_0062">Table</key>
<key name="header_0063">public</key> <!-- SQL schema -->
<key name="header_0064">history</key> <!-- SQL schema -->
@ -2389,6 +2389,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0736">The DR host: [#!variable!host!#] was not linked to the Anvil! node: [#!variable!anvil!#], nothing to do.</key>
<key name="log_0737">The job: [#!variable!command!#] (with job UUID: [#!variable!job_uuid!#]) is being skipped for now, already started a job (started job_uuid: [#!variable!started_job!#]) with this command on this loop.</key>
<key name="log_0738">There are no databases available at this time.</key>
<key name="log_0739">The server: [#!variable!server!#] needs it's pacemaker configuration updated. Running: [#!variable!command!#].</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

Loading…
Cancel
Save