Updated ocf:alteeve:server to better handle multi-peer DRBD configurations.

Cleaned up some logging in DRBD->get_status.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 3 years ago
parent e62e5d7b0c
commit bc39c3fe5c
  1. 11
      Anvil/Tools/DRBD.pm
  2. 98
      ocf/alteeve/server
  3. 4
      share/words.xml

@ -1648,7 +1648,7 @@ sub get_status
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call}); ($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code}, "drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}}); }});
} }
else else
@ -1666,7 +1666,7 @@ sub get_status
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
error => $error, error => $error,
output => $output, output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code}, "drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}}); }});
} }
@ -1913,12 +1913,14 @@ sub manage_resource
### can block startup, so to be safe, during start, we'll call adjust ### can block startup, so to be safe, during start, we'll call adjust
if ($task eq "up") if ($task eq "up")
{ {
# This generally brings up the resource
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
my $output = ""; my $output = "";
my $return_code = 255; my $return_code = 255;
if ($anvil->Network->is_local({host => $target})) if ($anvil->Network->is_local({host => $target}))
{ {
# Local. # Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
@ -1928,6 +1930,7 @@ sub manage_resource
else else
{ {
# Remote call. # Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({ ($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug, debug => $debug,
shell_call => $shell_call, shell_call => $shell_call,
@ -1944,12 +1947,15 @@ sub manage_resource
} }
} }
# If we 'adjust'ed abovem this will likely complain that the backing disk already exists, and that's
# fine.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = ""; my $output = "";
my $return_code = 255; my $return_code = 255;
if ($anvil->Network->is_local({host => $target})) if ($anvil->Network->is_local({host => $target}))
{ {
# Local. # Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
@ -1959,6 +1965,7 @@ sub manage_resource
else else
{ {
# Remote call. # Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({ ($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug, debug => $debug,
shell_call => $shell_call, shell_call => $shell_call,

@ -792,14 +792,11 @@ sub start_drbd_resource
my $local_host = $anvil->Get->short_host_name(); my $local_host = $anvil->Get->short_host_name();
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $host = $anvil->Get->short_host_name; my $host = $anvil->Get->short_host_name;
my $peer = $anvil->data->{drbd}{config}{$host}{peer};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server, server => $server,
host => $host, host => $host,
peer => $peer,
}}); }});
### TODO: Local start up below, move the peer check to have local startup is handled
# Do we need startup? # Do we need startup?
my $local_startup_needed = 0; my $local_startup_needed = 0;
$anvil->DRBD->get_status({debug => 3}); $anvil->DRBD->get_status({debug => 3});
@ -827,76 +824,65 @@ sub start_drbd_resource
} }
} }
# Do I need to start the DRBD resource locally? If so, do so.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_startup_needed => $local_startup_needed }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_startup_needed => $local_startup_needed }});
if ($local_startup_needed) if ($local_startup_needed)
{ {
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}}) foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{ {
my $peer_ip = $anvil->data->{drbd}{config}{$host}{resource}{$resource}{connection}{$peer}{ip_address};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0419", variables => {
server => $server,
peer => $peer,
peer_ip => $peer_ip,
resource => $resource,
}});
# Bring the local resource up # Bring the local resource up
$anvil->DRBD->manage_resource({ $anvil->DRBD->manage_resource({
debug => 2,
resource => $resource, resource => $resource,
task => "up", task => "up",
}); });
# Bring the peer's resource up. # Now wait for it to come up.
$anvil->DRBD->manage_resource({ my $waiting = 1;
resource => $resource, my $wait_until = time + 10;
task => "up", $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
target => $peer_ip, 's1:time' => time,
}); 's2:wait_until' => $wait_until,
}});
# Now wait for it to be connected or UpToDate...
my $waiting = 1;
while($waiting) while($waiting)
{ {
$anvil->DRBD->get_status({debug => 3}); $anvil->DRBD->get_status({debug => 3});
print "==] ".$local_host." [==] ".$resource." [==] ".$peer." [==\n"; my $all_up = 1;
print Dumper $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer};
print "=========================================================\n";
my $connection_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer}{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { connection_state => $connection_state }});
my $all_ready = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}}) foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}})
{ {
my $disk_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'}; my $disk_state = lc($anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'});
my $replication_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer}{volume}{$volume}{'replication-state'}; $disk_state = "" if not defined $disk_state;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
disk_state => $disk_state, 's1:resource' => $resource,
replication_state => $replication_state, 's2:volume' => $volume,
's3:disk_state' => $disk_state,
}}); }});
# Is the peer isn't connected (directly or by being in Sync), or this volume if (($disk_state ne "inconsistent") &&
# isn't UpToDate, we need to keep waiting. ($disk_state ne "outdated") &&
if ((lc($disk_state) ne "uptodate") && ($replication_state !~ /^Sync/i) && (lc($connection_state) ne "connected")) ($disk_state ne "consistent") &&
($disk_state ne "uptodate"))
{ {
$all_ready = 0; $all_up = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_up => $all_up }});
} }
} }
die; if ($all_up)
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
if ($all_ready)
{ {
$waiting = 0; $waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
} }
if ($waiting) if ($waiting)
{ {
sleep 1; sleep 2;
}
elsif (time > $wait_until)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0138"});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
} }
} }
} }
@ -920,7 +906,7 @@ sub start_drbd_resource
} }
# See if we're inconsistent and, if so, if we can connect our peers. # See if we're inconsistent and, if so, if we can connect our peers.
sleep 5; sleep 2;
$anvil->DRBD->get_status({debug => 3}); $anvil->DRBD->get_status({debug => 3});
my $peer_startup_needed = 0; my $peer_startup_needed = 0;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}}) foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
@ -986,20 +972,16 @@ sub start_drbd_resource
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
if ($access) if ($access)
{ {
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $host,
shell_call => $anvil->data->{path}{exe}{drbdadm}." up ".$resource,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0695", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0695", variables => {
return_code => $return_code, host => $host,
error => $error, resource => $resource,
output => $output,
}}); }});
$anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
target => $host,
});
} }
else else
{ {

@ -2087,7 +2087,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key> <key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key>
<key name="log_0693">The anvil-configure-host tool is requesting a reboot.</key> <key name="log_0693">The anvil-configure-host tool is requesting a reboot.</key>
<key name="log_0694">The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now.</key> <key name="log_0694">The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now.</key>
<key name="log_0695">The request to start the resource had the return code: [#!variable!return_code!#]. Call output, if any, was: [#!variable!output!#]. Errors, if any, were: [#!variable!error!#].</key> <key name="log_0695">About to request the start of the resource: [#variable!resource!#] on: [#!variable!host!#].</key>
<key name="log_0696">The peer: [#!variable!peer!#] is defined in the resource: [#!variable!resource!#] but we don't connect to it, ignoring it.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -3121,6 +3122,7 @@ We will sleep a bit and try again.
<key name="warning_0135">[ Warning ] - The program: [#!variable!program!#] was not found to be running.</key> <key name="warning_0135">[ Warning ] - The program: [#!variable!program!#] was not found to be running.</key>
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key> <key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key> <key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. --> <!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. --> <!-- Run 'striker-parse-os-list to find new entries. -->

Loading…
Cancel
Save