Merge pull request #219 from ClusterLabs/anvil-tools-dev

Anvil tools dev
main
Digimer 3 years ago committed by GitHub
commit 9b54813fe3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      Anvil/Tools/DRBD.pm
  2. 6
      notes
  3. 274
      ocf/alteeve/server
  4. 7
      share/words.xml
  5. 5
      tools/anvil-configure-host

@ -1648,7 +1648,7 @@ sub get_status
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call}); ($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code}, "drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}}); }});
} }
else else
@ -1666,7 +1666,7 @@ sub get_status
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
error => $error, error => $error,
output => $output, output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code}, "drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{$host}{return_code},
}}); }});
} }
@ -1913,12 +1913,14 @@ sub manage_resource
### can block startup, so to be safe, during start, we'll call adjust ### can block startup, so to be safe, during start, we'll call adjust
if ($task eq "up") if ($task eq "up")
{ {
# This generally brings up the resource
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
my $output = ""; my $output = "";
my $return_code = 255; my $return_code = 255;
if ($anvil->Network->is_local({host => $target})) if ($anvil->Network->is_local({host => $target}))
{ {
# Local. # Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
@ -1928,6 +1930,7 @@ sub manage_resource
else else
{ {
# Remote call. # Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({ ($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug, debug => $debug,
shell_call => $shell_call, shell_call => $shell_call,
@ -1944,12 +1947,15 @@ sub manage_resource
} }
} }
# If we 'adjust'ed abovem this will likely complain that the backing disk already exists, and that's
# fine.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = ""; my $output = "";
my $return_code = 255; my $return_code = 255;
if ($anvil->Network->is_local({host => $target})) if ($anvil->Network->is_local({host => $target}))
{ {
# Local. # Local.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output, output => $output,
@ -1959,6 +1965,7 @@ sub manage_resource
else else
{ {
# Remote call. # Remote call.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
($output, my $error, $return_code) = $anvil->Remote->call({ ($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug, debug => $debug,
shell_call => $shell_call, shell_call => $shell_call,

@ -312,6 +312,12 @@ pcs constraint location srv01-test prefers el8-a01n01=200 el8-a01n02=100
stonith-max-attempts=INFINITY stonith-max-attempts=INFINITY
cluster-recheck-interval puts an upper bound on the "i give up" time cluster-recheck-interval puts an upper bound on the "i give up" time
====
pcs resource create srv01-cs8 ocf:alteeve:server name="srv01-cs8" meta allow-migrate="true" target-role="stopped" op monitor interval="60" start timeout="INFINITY" on-fail="block" stop timeout="INFINITY" on-fail="block" migrate_to timeout="INFINITY"
pcs constraint location srv01-cs8 prefers mk-a02n01=200 mk-a02n02=100
==== DRBD notes ==== DRBD notes
* resources can contain an US-ASCII character, except for spaces * resources can contain an US-ASCII character, except for spaces

@ -792,29 +792,27 @@ sub start_drbd_resource
my $local_host = $anvil->Get->short_host_name(); my $local_host = $anvil->Get->short_host_name();
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $host = $anvil->Get->short_host_name; my $host = $anvil->Get->short_host_name;
my $peer = $anvil->data->{drbd}{config}{$host}{peer};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server, server => $server,
host => $host, host => $host,
peer => $peer,
}}); }});
# Do we need startup? # Do we need startup?
my $startup_needed = 0; my $local_startup_needed = 0;
$anvil->DRBD->get_status({debug => 3}); $anvil->DRBD->get_status({debug => 3});
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}}) foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{ {
# Is the current resource up locally already? If it is, we're done. # Is the current resource up locally already?
my $role = defined $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} ? $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} : ""; my $role = defined $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} ? $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
resource => $resource, 's1:resource' => $resource,
role => $role, 's2:role' => $role,
}}); }});
if ((lc($role) ne "secondary") && (lc($role) ne "primary")) if ((lc($role) ne "secondary") && (lc($role) ne "primary"))
{ {
$startup_needed = 1; $local_startup_needed = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup_needed => $startup_needed }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_startup_needed => $local_startup_needed }});
last; last;
} }
else else
@ -826,95 +824,219 @@ sub start_drbd_resource
} }
} }
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup_needed => $startup_needed }}); # Do I need to start the DRBD resource locally? If so, do so.
if (not $startup_needed) $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_startup_needed => $local_startup_needed }});
if ($local_startup_needed)
{ {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0431"}); foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
return(0); {
# Bring the local resource up
$anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
});
# Now wait for it to come up.
my $waiting = 1;
my $wait_until = time + 10;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:time' => time,
's2:wait_until' => $wait_until,
}});
while($waiting)
{
$anvil->DRBD->get_status({debug => 3});
my $all_up = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}})
{
my $disk_state = lc($anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'});
$disk_state = "" if not defined $disk_state;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:resource' => $resource,
's2:volume' => $volume,
's3:disk_state' => $disk_state,
}});
if (($disk_state ne "inconsistent") &&
($disk_state ne "outdated") &&
($disk_state ne "consistent") &&
($disk_state ne "uptodate"))
{
$all_up = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_up => $all_up }});
}
}
if ($all_up)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
if ($waiting)
{
sleep 2;
}
elsif (time > $wait_until)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0138"});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
}
# If auto-promote isn't set, promote the resource.
if (not $anvil->data->{drbd}{config}{$local_host}{'auto-promote'})
{
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0420", variables => {
server => $server,
resource => $resource,
}});
# Make the local resource primary.
$anvil->DRBD->manage_resource({
resource => $resource,
task => "primary",
});
}
}
} }
# Start DRBD locally. # See if we're inconsistent and, if so, if we can connect our peers.
sleep 2;
$anvil->DRBD->get_status({debug => 3});
my $peer_startup_needed = 0;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}}) foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{ {
my $peer_ip = $anvil->data->{drbd}{config}{$host}{resource}{$resource}{connection}{$peer}{ip_address}; # Is the current resource up locally already?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0419", variables => { my $role = defined $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} ? $anvil->data->{drbd}{status}{$host}{resource}{$resource}{role} : "";
server => $server, $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer => $peer, 's1:resource' => $resource,
peer_ip => $peer_ip, 's2:role' => $role,
resource => $resource,
}}); }});
# Bring the local resource up # Check all volumes.
$anvil->DRBD->manage_resource({ foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}})
resource => $resource, {
task => "up", my $disk_state = defined $anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'} ? $anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'} : "";
}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { disk_state => $disk_state }});
# Bring the peer's resource up.
$anvil->DRBD->manage_resource({
resource => $resource,
task => "up",
target => $peer_ip,
});
# Now wait for it to be connected or UpToDate... if ((lc($disk_state) eq "consistent") or
my $waiting = 1; (lc($disk_state) eq "outdated") or
while($waiting) (lc($disk_state) eq "failed") or
(not $disk_state))
{
# This will trigger trying to ssh into peer(s) and up'ing their resource.
$peer_startup_needed = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_startup_needed => $peer_startup_needed }});
last;
}
}
}
# Do we need to start the resource on our peers?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_startup_needed => $peer_startup_needed }});
if (not $peer_startup_needed)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0431"});
return(0);
}
# Start DRBD on the peer(s).
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
foreach my $host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}})
{ {
$anvil->DRBD->get_status({debug => 3}); my $is_local = $anvil->Network->is_local({host => $host});
my $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer}{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
connection_state => $connection_state, 's1:host' => $host,
's2:is_local' => $is_local,
}}); }});
my $all_ready = 1; my $connection_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$host}{'connection-state'};
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}}) $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { connection_state => $connection_state }});
if (lc($connection_state) ne "connected")
{ {
my $disk_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'}; # Try to connect to the peer and up this reasource.
my $replication_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer}{volume}{$volume}{'replication-state'}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0694", variables => {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host => $host,
disk_state => $disk_state, resource => $resource,
replication_state => $replication_state, connection_state => $connection_state,
}}); }});
my ($access) = $anvil->Remote->test_access({target => $host});
# Is the peer isn't connected (directly or by being in Sync), or this volume $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
# isn't UpToDate, we need to keep waiting. if ($access)
if ((lc($disk_state) ne "uptodate") && ($replication_state !~ /^Sync/i) && (lc($connection_state) ne "connected"))
{ {
$all_ready = 0; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0695", variables => {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }}); host => $host,
resource => $resource,
}});
$anvil->DRBD->manage_resource({
debug => 2,
resource => $resource,
task => "up",
target => $host,
});
}
else
{
# No access
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0136", variables => { host => $host }});
} }
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
if ($all_ready)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
if ($waiting)
{
sleep 1;
} }
} }
} }
# If auto-promote isn't set, promote the resource. # Loop until all our resources are Connected or UpToDate
if (not $anvil->data->{drbd}{config}{$host}{'auto-promote'}) my $waiting = 1;
my $wait_until = time + 30;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:time' => time,
's2:wait_until' => $wait_until,
}});
while($waiting)
{ {
sleep 5;
my $all_connected = 1;
$anvil->DRBD->get_status({debug => 3});
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}}) foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{ {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0420", variables => { foreach my $host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}})
server => $server, {
resource => $resource, $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host => $host }});
}}); next if $anvil->Network->is_local({host => $host});
# Make the local resource primary. foreach my $connection (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}})
$anvil->DRBD->manage_resource({ {
resource => $resource, my $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{'connection-state'};
task => "primary", $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
}); connection => $connection,
connection_state => $connection_state,
}});
if (lc($connection_state) ne "connected")
{
$all_connected = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_connected => $all_connected }});
}
}
}
}
if ($all_connected)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
elsif (time > $wait_until)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "warning_0137"});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
} }
} }

@ -2085,6 +2085,10 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0690">Kernel updated, reboot queued.</key> <key name="log_0690">Kernel updated, reboot queued.</key>
<key name="log_0691">Requested to power-off as part of the anvil-safe-stop job.</key> <key name="log_0691">Requested to power-off as part of the anvil-safe-stop job.</key>
<key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key> <key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key>
<key name="log_0693">The anvil-configure-host tool is requesting a reboot.</key>
<key name="log_0694">The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now.</key>
<key name="log_0695">About to request the start of the resource: [#variable!resource!#] on: [#!variable!host!#].</key>
<key name="log_0696">The peer: [#!variable!peer!#] is defined in the resource: [#!variable!resource!#] but we don't connect to it, ignoring it.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -3116,6 +3120,9 @@ We will sleep a bit and try again.
<key name="warning_0133">[ Warning ] - Table: [history.#!variable!table!#] not found.</key> <key name="warning_0133">[ Warning ] - Table: [history.#!variable!table!#] not found.</key>
<key name="warning_0134">[ Warning ] - Holding off starting the cluster. Tested access to ourself, and failed. Is '/etc/hosts' populated? Will try again in ten seconds.</key> <key name="warning_0134">[ Warning ] - Holding off starting the cluster. Tested access to ourself, and failed. Is '/etc/hosts' populated? Will try again in ten seconds.</key>
<key name="warning_0135">[ Warning ] - The program: [#!variable!program!#] was not found to be running.</key> <key name="warning_0135">[ Warning ] - The program: [#!variable!program!#] was not found to be running.</key>
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. --> <!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. --> <!-- Run 'striker-parse-os-list to find new entries. -->

@ -113,6 +113,7 @@ sub do_reboot
my ($anvil) = @_; my ($anvil) = @_;
# Mark that a reboot is needed, in case something kills us before we actually reboot. # Mark that a reboot is needed, in case something kills us before we actually reboot.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0693" }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE, file => $THIS_FILE,
line => __LINE__, line => __LINE__,
@ -1233,7 +1234,7 @@ sub reconfigure_network
# If we should reset, do so now. # If we should reset, do so now.
if ($anvil->data->{sys}{reboot}) if ($anvil->data->{sys}{reboot})
{ {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, 'print' => 1, key => "log_0631"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "log_0631" }});
do_reboot($anvil); do_reboot($anvil);
} }
@ -1241,7 +1242,7 @@ sub reconfigure_network
{ {
# In an attempt to make network changes more reliable, we'll just reboot. This shouldn't # In an attempt to make network changes more reliable, we'll just reboot. This shouldn't
# actually be hit anymore as any change should have triggered the reboot above. # actually be hit anymore as any change should have triggered the reboot above.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, 'print' => 1, key => "log_0631"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "log_0631" }});
do_reboot($anvil); do_reboot($anvil);
# # Re-read the config # # Re-read the config

Loading…
Cancel
Save