* Finished Cluster->check_server_constraints() and added it to scan-cluster. This now makes sure servers don't roll back to their old host after it has been fenced and recovers.

* Completely disabled Network->check_network(), it's causing more problems than it solves.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 97d20ccd71
commit d3052c0229
  1. 101
      Anvil/Tools/Cluster.pm
  2. 3
      Anvil/Tools/Network.pm
  3. 4
      scancore-agents/scan-cluster/scan-cluster
  4. 5
      share/words.xml
  5. 2
      tools/anvil-daemon

@ -768,7 +768,108 @@ sub check_server_constraints
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->check_server_constraints()" }});
# Are we a node?
my $host_type = $anvil->Get->host_type({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_type => $host_type }});
if ($host_type ne "node")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0125"});
return("!!error!!");
}
# Are we in the cluster?
my $problem = $anvil->Cluster->parse_cib({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
if ($problem)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0126"});
return('!!error!!');
}
# Are we a full member?
if (not $anvil->data->{cib}{parsed}{'local'}{ready})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0127"});
return('!!error!!');
}
# Is our peer offline? If it's online, do nothing
if ($anvil->data->{cib}{parsed}{peer}{ready})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0640"});
return(0);
}
# Get the list of fence methods for my peer and I and make sure their configs are valid.
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug});
my $anvil_name = $anvil->Get->anvil_name_from_uuid({debug => $debug, anvil_uuid => $anvil_uuid });
my $local_node_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $local_host_uuid = $anvil->Get->host_uuid();
my $peer_node_name = $anvil->data->{cib}{parsed}{peer}{name};
my $peer_host_uuid = $anvil->data->{cib}{parsed}{peer}{host_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
anvil_uuid => $anvil_uuid,
anvil_name => $anvil_name,
local_node_name => $local_node_name,
local_host_uuid => $local_host_uuid,
peer_node_name => $peer_node_name,
peer_host_uuid => $peer_host_uuid,
}});
foreach my $id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{configuration}{constraints}{location}})
{
my $node_name = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node};
my $resource = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{resource};
my $score = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{score};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
id => $id,
node_name => $node_name,
resource => $resource,
score => $score,
}});
$anvil->data->{location_constraint}{resource}{$resource}{node}{$node_name}{score} = $score;
$anvil->data->{location_constraint}{resource}{$resource}{node}{$node_name}{id} = $id;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"location_constraint::resource::${resource}::node::${node_name}::score" => $anvil->data->{location_constraint}{resource}{$resource}{node}{$node_name}{score},
"location_constraint::resource::${resource}::node::${node_name}::id" => $anvil->data->{location_constraint}{resource}{$resource}{node}{$node_name}{id},
}});
}
# Higher score == preferred
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{location_constraint}{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource => $resource }});
my $high_score = 0;
my $preferred_node = "";
foreach my $node_name (sort {$a cmp $b} keys %{$anvil->data->{location_constraint}{resource}{$resource}{node}})
{
my $score = $anvil->data->{location_constraint}{resource}{$resource}{node}{$node_name}{score};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
node_name => $node_name,
score => $score,
}});
if ($score > $high_score)
{
$high_score = $score;
$preferred_node = $node_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
high_score => $high_score,
preferred_node => $preferred_node,
}});
}
if ($local_node_name ne $preferred_node)
{
# Make us the preferred node.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0641", variables => { server => $resource }});
$anvil->Cluster->_set_server_constraint({
server => $resource,
preferred_node => $local_node_name,
});
}
}
}
return(0);
}

@ -261,6 +261,9 @@ sub check_network
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Network->check_internet()" }});
# This is causing more problems than it solves. Disabled for the time being.
return(0);
my $heal = defined $parameter->{heal} ? $parameter->{heal} : "down_only";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
heal => $heal,

@ -106,6 +106,10 @@ check_config($anvil);
# Check the fence delay
check_fence_delay($anvil);
# Check that the location constraint is sensible.
$anvil->Cluster->check_server_constraints();
$anvil->nice_exit({exit_code => 0});
#############################################################################################################

@ -1833,6 +1833,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0637">We've got: [#!variable!local_server_count!#] servers, and the peer has: [#!variable!peer_server_count!#] servers. Skipping fence delay preference checks for now.</key>
<key name="log_0638">We're hosting servers, and our peer is not. Making the fence delay favours this node.</key>
<key name="log_0639">The Anvil! daemon is in startup mode, and the job: [#!variable!job_uuid!#], command: [#!variable!job_command!#] is not a startup job, ignoring it for now.</key>
<key name="log_0640">Out peer is online, no need to check server location constraints.</key>
<key name="log_0641">The server: [#!variable!server!#] has a location constraint that preferres our peer, but our peer is offline. Updating the location constraint to prefer this node.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -2817,6 +2819,9 @@ Read UUID: .... [#!variable!read_uuid!#]
<key name="warning_0122">[ Warning ] - Asked to find or set the fence delay, but this is not a node.</key>
<key name="warning_0123">[ Warning ] - Asked to find or set the fence delay, but node is not in a cluster.</key>
<key name="warning_0124">[ Warning ] - Asked to find or set the fence delay, but node is not fully in the cluster yet.</key>
<key name="warning_0125">[ Warning ] - Asked to check server location constraints, but this is not a node.</key>
<key name="warning_0126">[ Warning ] - Asked to check server location constraints, but this node is not in a cluster.</key>
<key name="warning_0127">[ Warning ] - Asked to check server location constraints, but this node is not fully in the cluster yet.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. -->

@ -393,7 +393,7 @@ sub check_network
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
}
$anvil->Network->check_network({heal => "all"});
#$anvil->Network->check_network({heal => "all"});
$anvil->data->{sys}{network}{initial_checks} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {

Loading…
Cancel
Save