Merge pull request #220 from ClusterLabs/anvil-tools-dev

Further improved startup DRBD logic in ocf:alteeve:server. Specifical…
main
digimer-bot 3 years ago committed by GitHub
commit 8432edd4bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      cgi-bin/striker
  2. 94
      ocf/alteeve/server
  3. 2
      share/words.xml
  4. 6
      tools/anvil-configure-host
  5. 55
      tools/anvil-daemon
  6. 4
      tools/anvil-manage-power
  7. 4
      tools/anvil-safe-stop
  8. 2
      tools/anvil-update-system

@ -6461,7 +6461,7 @@ sub process_power
$say_description = "#!string!job_0008!#";
$say_reason = "log_0200";
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => $say_reason }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!".$say_reason."!#" }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,

@ -839,7 +839,7 @@ sub start_drbd_resource
# Now wait for it to come up.
my $waiting = 1;
my $wait_until = time + 10;
my $wait_until = time + 5;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:time' => time,
's2:wait_until' => $wait_until,
@ -876,7 +876,7 @@ sub start_drbd_resource
}
if ($waiting)
{
sleep 2;
sleep 1;
}
elsif (time > $wait_until)
{
@ -908,7 +908,7 @@ sub start_drbd_resource
# See if we're inconsistent and, if so, if we can connect our peers.
sleep 2;
$anvil->DRBD->get_status({debug => 3});
my $peer_startup_needed = 0;
my $peer_startup_needed = 1;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
# Is the current resource up locally already?
@ -930,7 +930,7 @@ sub start_drbd_resource
(not $disk_state))
{
# This will trigger trying to ssh into peer(s) and up'ing their resource.
$peer_startup_needed = 1;
$peer_startup_needed = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_startup_needed => $peer_startup_needed }});
last;
}
@ -994,40 +994,100 @@ sub start_drbd_resource
# Loop until all our resources are Connected or UpToDate
my $waiting = 1;
my $wait_until = time + 30;
my $wait_until = time + 5;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:time' => time,
's2:wait_until' => $wait_until,
}});
while($waiting)
{
sleep 5;
my $all_connected = 1;
sleep 1;
$anvil->DRBD->get_status({debug => 3});
my $all_resources_ok = 1;
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
foreach my $host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}})
# This is set to '1' is either the volumes are UpToDate or Sync'ing.
$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{ok} = 0;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host => $host }});
next if $anvil->Network->is_local({host => $host});
foreach my $connection (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}})
# This will be used to mark if a volume is being sync'ed later, if needed.
$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok} = 0;
my $disk_state = lc($anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'});
$disk_state = "" if not defined $disk_state;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { disk_state => $disk_state }});
if ($disk_state ne "uptodate")
{
$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"drbd::status::${local_host}::resource::${resource}::devices::volume::${volume}::ok" => $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok},
}});
}
}
if (not $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{ok})
{
# See if we're a SyncTarget
foreach my $connection (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}})
{
my $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{'connection-state'};
my $connection_state = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
connection => $connection,
connection_state => $connection_state,
}});
if (lc($connection_state) ne "connected")
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$connection}{volume}})
{
$all_connected = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_connected => $all_connected }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"drbd::status::${local_host}::resource::${resource}::devices::volume::${volume}::ok" => $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok},
}});
next if $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok};
my $replication_state = lc($anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{connection}{$connection}{volume}{$volume}{'replication-state'});
$replication_state = "" if not defined $replication_state;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:volume' => $volume,
's2:replication_state' => $replication_state,
}});
if ($replication_state =~ /sync/)
{
# We're good to go.
$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"drbd::status::${local_host}::resource::${resource}::devices::volume::${volume}::ok" => $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok},
}});
}
}
}
}
# Loop through all volumes on all resources and see if they're OK. If they are, mark the resource as OK.
my $resource_ok = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}})
{
my $volume_ok = $anvil->data->{drbd}{status}{$local_host}{resource}{$resource}{devices}{volume}{$volume}{ok};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume,
volume_ok => $volume_ok,
}});
if (not $volume_ok)
{
$resource_ok = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_ok => $resource_ok }});
}
}
if (not $resource_ok)
{
$all_resources_ok = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_resources_ok => $all_resources_ok }});
}
}
if ($all_connected)
if ($all_resources_ok)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
@ -1308,7 +1368,7 @@ pmsuspended - The domain has been suspended by guest power management, e.g. ente
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0525", variables => { server_name => $server }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { loop => $loop }});
sleep 2;
sleep 1;
last;
}
elsif ($state eq "shut off")

@ -2087,7 +2087,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0692">The anvil-safe-stop job has completed and will now power off.</key>
<key name="log_0693">The anvil-configure-host tool is requesting a reboot.</key>
<key name="log_0694">The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now.</key>
<key name="log_0695">About to request the start of the resource: [#variable!resource!#] on: [#!variable!host!#].</key>
<key name="log_0695">About to request the start of the resource: [#!variable!resource!#] on: [#!variable!host!#].</key>
<key name="log_0696">The peer: [#!variable!peer!#] is defined in the resource: [#!variable!resource!#] but we don't connect to it, ignoring it.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->

@ -113,7 +113,7 @@ sub do_reboot
my ($anvil) = @_;
# Mark that a reboot is needed, in case something kills us before we actually reboot.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0693" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0693!#" }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
@ -1234,7 +1234,7 @@ sub reconfigure_network
# If we should reset, do so now.
if ($anvil->data->{sys}{reboot})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "log_0631" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "#!string!log_0631!#" }});
do_reboot($anvil);
}
@ -1242,7 +1242,7 @@ sub reconfigure_network
{
# In an attempt to make network changes more reliable, we'll just reboot. This shouldn't
# actually be hit anymore as any change should have triggered the reboot above.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "log_0631" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0687", variables => { reason => "#!string!log_0631!#" }});
do_reboot($anvil);
# # Re-read the config

@ -615,29 +615,40 @@ sub handle_periodic_tasks
}
elsif ($uuid eq $host_uuid)
{
# This is us, backup and shut down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"});
# Switch the read_uuid and then close
$anvil->data->{sys}{database}{read_uuid} = $first_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }});
# Disconnect
$anvil->data->{cache}{database_handle}{$uuid}->disconnect;
delete $anvil->data->{cache}{database_handle}{$uuid};
# Create a backup, this is useful also for setting the mtime of the last time
# we were up.
my $dump_file = $anvil->Database->backup_database({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
# Stop the daemon
my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
if ($return_code eq "0")
### TODO: We need to have a way to tell clients to disconnect
### and then shutdown cleanly. This "Wait for an hour"
### is a kludge.
# This is us, Have we been up for at least an hour?
my $uptime = $anvil->Get->uptime();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }});
if ($uptime > 3600)
{
# Stopped the daemon.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"});
# backup and shut down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"});
# Switch the read_uuid and then close
$anvil->data->{sys}{database}{read_uuid} = $first_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid},
}});
# Disconnect
$anvil->data->{cache}{database_handle}{$uuid}->disconnect;
delete $anvil->data->{cache}{database_handle}{$uuid};
# Create a backup, this is useful also for setting
# the mtime of the last time we were up.
my $dump_file = $anvil->Database->backup_database({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
# Stop the daemon
my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
if ($return_code eq "0")
{
# Stopped the daemon.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"});
}
}
}
}

@ -130,7 +130,7 @@ if ($anvil->data->{switches}{'reboot-needed'} eq "1")
# Enable
if (not $reboot_needed)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0688" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0688!#" }});
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
print $anvil->Words->string({key => "message_0048"})."\n";
@ -248,7 +248,7 @@ sub do_poweroff
# Make sure the 'reboot needed' flag is set. When 'anvil-daemon' starts, it will use this to confirm
# that it is starting post-reboot and clear it.
my $say_reason = $task eq "poweroff" ? "log_0689" : "log_0688";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => $say_reason }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!".$say_reason."!#" }});
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});

@ -101,7 +101,7 @@ if ($anvil->data->{switches}{'job-uuid'})
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::power-off' => $anvil->data->{switches}{'power-off'},
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0691" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0691!#" }});
}
if ($line =~ /stop-reason=(.*?)$/)
{
@ -157,7 +157,7 @@ if ($anvil->data->{switches}{'power-off'})
host_status => "stopping",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0692" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0692!#" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0325"});
$anvil->Job->update_progress({progress => 100, message => "job_0325"});

@ -173,7 +173,7 @@ sub run_os_update
if ($line =~ /^kernel /)
{
# Reboot will be needed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "log_0690" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { reboot_needed => $reboot_needed }});
}

Loading…
Cancel
Save