This commit addresses (hopefully) issue #329.

* Updated DRBD->get_status() to attempt to recompile the drbd kernel module if the drbdsetup status fails. If it continues to fail, it exits gracefully now.
* Updated ocf:alteeve:server to test access over a given IP before calling Server->find to avoid timeouts when the peer is down. Also updated it to set the constraints to keep the server on the new host when the old host returns to the cluster.
* Fixed a bug in scan-cluster where a server that is FAILED but not running is now properly recovered.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 2 years ago
parent 7cee742b67
commit 0e57836c8f
  1. 44
      Anvil/Tools/DRBD.pm
  2. 25
      ocf/alteeve/server
  3. 3
      scancore-agents/scan-cluster/scan-cluster
  4. 12
      share/words.xml

@ -2180,7 +2180,8 @@ sub get_status
my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." status --json";
my $output = "";
my $host = $anvil->Get->short_host_name();
if ($anvil->Network->is_local({host => $target}))
my $is_local = $anvil->Network->is_local({host => $target});
if ($is_local)
{
# Local.
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
@ -2214,9 +2215,48 @@ sub get_status
delete $anvil->data->{drbd}{status}{$host};
}
if ($output =~ /modprobe: FATAL: Module drbd not found/i)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }});
if ($is_local)
{
# Try rebuilding the module.
my $problem = $anvil->DRBD->_initialize_kmod({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
if ($problem)
{
# Try again.
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}});
if ($output =~ /modprobe: FATAL: Module drbd not found/i)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "error_0415", variables => {
output => $output,
return_code => $anvil->data->{drbd}{status}{$host}{return_code},
}});
return(1);
}
}
}
}
# Parse the output.
local $@;
my $json = JSON->new->allow_nonref;
my $drbd_status = $json->decode($output);
my $drbd_status = eval { $json->decode($output); };
if ($@)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "error_0416", variables => {
json => $drbd_status,
error => $@,
}});
return(1);
}
foreach my $hash_ref (@{$drbd_status})
{
my $resource = $hash_ref->{name};

@ -101,6 +101,8 @@ $| = 1;
# in the loop as well to override defaults in code.
my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2});
### Read or Set the environment variables
# This is the name of the server we're managing. # Example values:
$anvil->data->{environment}{OCF_RESKEY_name} = defined $ENV{OCF_RESKEY_name} ? $ENV{OCF_RESKEY_name} : "";
@ -612,8 +614,21 @@ sub start_server
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }});
if ($success)
{
# Make sure server constraints favour us.
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_name => $local_name,
peer_name => $peer_name,
}});
$anvil->Cluster->_set_server_constraint({
debug => 2,
server => $server,
preferred_node => $local_name,
});
# Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0309", variables => { server => $server }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0309", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
else
@ -1021,12 +1036,20 @@ sub find_server
foreach my $ip_address (sort {$a cmp $b} keys %{$anvil->data->{drbd}{config}{$host}{ip_addresses}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ip_address => $ip_address }});
# Can we access this IP?
my $access = $anvil->Remote->test_access({target => $ip_address});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
if ($access)
{
$anvil->Server->find({
debug => 3,
target => $ip_address,
remote_user => "root",
});
}
}
foreach my $this_server (sort {$a cmp $b} keys %{$anvil->data->{server}{location}})
{

@ -194,7 +194,8 @@ sub check_resources
# The server was found to be running, but not here (or this node is not fully in the cluster). NOT attempting recovery yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "scan_cluster_log_0009"});
}
elsif ($attempt_recovery)
if ($attempt_recovery)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "scan_cluster_log_0010"});
$anvil->Cluster->recover_server({

@ -588,6 +588,18 @@ The definition data passed in was:
<key name="error_0412"><![CDATA[The server: [#!variable!server!#] is on the Anvil! node: [#!variable!on_anvil!#], and this is: [#!variable!this_anvil!#]. Exiting.]]></key>
<key name="error_0413"><![CDATA[You need to specify the server with '--server <name or uuid>'. Available servers are;]]></key>
<key name="error_0414"><![CDATA[The target host: [#!variable!target!#] was not found in the database.]]></key>
<key name="error_0415"><![CDATA[Failed to call the JSON formatted drbdsetup status. The output was: [#!variable!output!#] which exited with the return code: [#!variable!return_code!#].]]></key>
<key name="error_0416">[ Warning ] - Failed to parse the DRBD setup status JSON. The JSON read was:
========
#!variable!json!#
========
The error was:
========
#!variable!error!#
========
</key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which lines are translatable -->

Loading…
Cancel
Save