Updated ocf:alteeve:server to better handle multi-peer DRBD configurations.

Cleaned up some logging in DRBD->get_status.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 3 years ago
parent e62e5d7b0c
commit bc39c3fe5c
  1. 11
      Anvil/Tools/DRBD.pm
  2. 98
      ocf/alteeve/server
  3. 4
      share/words.xml

@ -1648,7 +1648,7 @@ sub get_status
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code},
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}});
}
else
@ -1666,7 +1666,7 @@ sub get_status
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
error => $error,
output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code},
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}});
}
@ -1913,12 +1913,14 @@ sub manage_resource
### can block startup, so to be safe, during start, we'll call adjust
if ($task eq "up")
{
# This generally brings up the resource
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
my $output = "";
my $return_code = 255;
if ($anvil->Network->is_local({host => $target}))
{
# Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
@ -1928,6 +1930,7 @@ sub manage_resource
else
{
# Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
@ -1944,12 +1947,15 @@ sub manage_resource
}
}
# If we 'adjust'ed abovem this will likely complain that the backing disk already exists, and that's
# fine.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = "";
my $return_code = 255;
if ($anvil->Network->is_local({host => $target}))
{
# Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
@ -1959,6 +1965,7 @@ sub manage_resource
else
{
# Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,

@ -792,14 +792,11 @@ sub start_drbd_resource
my $local_host = $anvil->Get->short_host_name();
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $host = $anvil->Get->short_host_name;
my $peer = $anvil->data->{drbd}{config}{$host}{peer};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
host => $host,
peer => $peer,
server => $server,
host => $host,
}});
### TODO: Local start up below, move the peer check to have local startup is handled
# Do we need startup?
my $local_startup_needed = 0;
$anvil->DRBD->get_status({debug => 3});
@ -827,76 +824,65 @@ sub start_drbd_resource
}
}
# Do I need to start the DRBD resource locally? If so, do so.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_startup_needed => $local_startup_needed }});
if ($local_startup_needed)
{
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
my $peer_ip = $anvil->data->{drbd}{config}{$host}{resource}{$resource}{connection}{$peer}{ip_address};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0419", variables => {
server => $server,
peer => $peer,
peer_ip => $peer_ip,
resource => $resource,
}});
# Bring the local resource up
$anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
});
# Bring the peer's resource up.
$anvil->DRBD->manage_resource({
resource => $resource,
task => "up",
target => $peer_ip,
});
# Now wait for it to be connected or UpToDate...
my $waiting = 1;
# Now wait for it to come up.
my $waiting = 1;
my $wait_until = time + 10;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:time' => time,
's2:wait_until' => $wait_until,
}});
while($waiting)
{
$anvil->DRBD->get_status({debug => 3});
print "==] ".$local_host." [==] ".$resource." [==] ".$peer." [==\n";
print Dumper $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer};
print "=========================================================\n";
my $connection_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer}{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { connection_state => $connection_state }});
my $all_ready = 1;
my $all_up = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}})
{
my $disk_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'};
my $replication_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$peer}{volume}{$volume}{'replication-state'};
my $disk_state = lc($anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'});
$disk_state = "" if not defined $disk_state;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
disk_state => $disk_state,
replication_state => $replication_state,
's1:resource' => $resource,
's2:volume' => $volume,
's3:disk_state' => $disk_state,
}});
# Is the peer isn't connected (directly or by being in Sync), or this volume
# isn't UpToDate, we need to keep waiting.
if ((lc($disk_state) ne "uptodate") && ($replication_state !~ /^Sync/i) && (lc($connection_state) ne "connected"))
if (($disk_state ne "inconsistent") &&
($disk_state ne "outdated") &&
($disk_state ne "consistent") &&
($disk_state ne "uptodate"))
{
$all_ready = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
$all_up = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_up => $all_up }});
}
}
die;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
if ($all_ready)
if ($all_up)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
if ($waiting)
{
sleep 1;
sleep 2;
}
elsif (time > $wait_until)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0138"});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
}
@ -920,7 +906,7 @@ sub start_drbd_resource
}
# See if we're inconsistent and, if so, if we can connect our peers.
sleep 5;
sleep 2;
$anvil->DRBD->get_status({debug => 3});
my $peer_startup_needed = 0;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
@ -986,20 +972,16 @@ sub start_drbd_resource
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
if ($access)
{
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $host,
shell_call => $anvil->data->{path}{exe}{drbdadm}." up ".$resource,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0695", variables => {
return_code => $return_code,
error => $error,
output => $output,
host => $host,
resource => $resource,
}});
$anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
target => $host,
});
}
else
{

@ -2087,7 +2087,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key>
<key name="log_0693">The anvil-configure-host tool is requesting a reboot.</key>
<key name="log_0694">The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now.</key>
<key name="log_0695">The request to start the resource had the return code: [#!variable!return_code!#]. Call output, if any, was: [#!variable!output!#]. Errors, if any, were: [#!variable!error!#].</key>
<key name="log_0695">About to request the start of the resource: [#variable!resource!#] on: [#!variable!host!#].</key>
<key name="log_0696">The peer: [#!variable!peer!#] is defined in the resource: [#!variable!resource!#] but we don't connect to it, ignoring it.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -3121,6 +3122,7 @@ We will sleep a bit and try again.
<key name="warning_0135">[ Warning ] - The program: [#!variable!program!#] was not found to be running.</key>
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. -->

Loading…
Cancel
Save