From 7b090e16239f334744ed7487728ccf1d35fb1778 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 15 Mar 2022 22:33:42 -0400 Subject: [PATCH] * Updated Database->shutdown() to disconnect, stop the postgresql daemon, then reconnect. * Updated anvil-daemon to not stop a database until both/all DB hosts are in both/all DB's hosts table. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 14 +++++- share/words.xml | 1 + tools/anvil-daemon | 105 +++++++++++++++++++++------------------- 3 files changed, 68 insertions(+), 52 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 046d64be..5bf1a5d9 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -15262,6 +15262,7 @@ sub mark_active my $pid = $$; my $state_name = "db_in_use::".$uuid."::".$pid; my $state_uuid = $anvil->Database->insert_or_update_states({ + debug => $debug, state_name => $state_name, state_host_uuid => $anvil->data->{sys}{host_uuid}, state_note => $value, @@ -16471,6 +16472,9 @@ sub shutdown } } + $host_uuid = $anvil->Get->host_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }}); + # Delete all jobs on our local database, and then stop the DB $query = "DELETE FROM history.jobs; DELETE FROM jobs;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }}); @@ -16480,6 +16484,7 @@ sub shutdown my $pid = $$; my $state_name = "db_in_use::".$host_uuid."::".$pid; my $state_uuid = $anvil->Database->insert_or_update_states({ + debug => $debug, state_name => $state_name, state_host_uuid => $anvil->data->{sys}{host_uuid}, state_note => "0", @@ -16492,8 +16497,9 @@ sub shutdown # Close our own connection. $anvil->Database->locking({debug => $debug, release => 1}); - $anvil->data->{cache}{database_handle}{$host_uuid}->disconnect; - delete $anvil->data->{cache}{database_handle}{$host_uuid}; + + # Disconnect from all databases and then stop the daemon, then reconnect. + $anvil->Database->disconnect({debug => $debug}); # Stop the daemon. my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); @@ -16504,6 +16510,10 @@ sub shutdown $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); } + # Reconnect + $anvil->refresh(); + $anvil->Database->connect({debug => $debug}); + return(0); } diff --git a/share/words.xml b/share/words.xml index e33e6836..bfc3fe76 100644 --- a/share/words.xml +++ b/share/words.xml @@ -3131,6 +3131,7 @@ We will sleep a bit and try again. [ Warning ] - A no-longer active PID: [#!variable!pid!#] had marked our database as "in_use", but the PID is gone now. Reaping the flag. [ Warning ] - We waited for: [#!variable!wait_time!#] seconds for all users of the local database to exit. Giving up waiting and taking the database down now. [ Warning ] - The command: [#!variable!command!#] is still using our database. + [ Warning ] - While evaluating database shutdown, the host UUID: [#!variable!host_uuid!#] was not yet found in the database on host: [#!variable!db_uuid!#]. DB shutdown will not happen until all hosts are in all DBs. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 8a773b6e..69fd890a 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -250,7 +250,7 @@ while(1) check_ram($anvil); # Disconnect from the database(s) and sleep now. - $anvil->Database->disconnect(); + $anvil->Database->disconnect({debug => 2}); sleep(2); } @@ -621,71 +621,76 @@ sub handle_periodic_tasks host_uuid => $host_uuid, }}); - # Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our - # database. + # Are we a Striker and is there two or more connections? If so, evaluate if we should shut + # down our database. if ($host_type eq "striker") { if ($anvil->data->{sys}{database}{connections} > 1) { - # Sort by UUID, skip the first, and see if we're one of the others. - my $first_uuid = ""; + # Make sure that all active databases are in the host's table. If they're + # not, we're still early in setup. To do this, we create an array of hosts + # and then query both/all DBs to ensure they all have all hosts. + my $all_in_hosts = 1; + my $db_hosts = []; foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }}); - if (not $first_uuid) - { - $first_uuid = $uuid; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }}); - - # Skip the first UUID so it doesn't evaluate for shutdown. - next; - } - elsif ($uuid eq $host_uuid) + push @{$db_hosts}, $uuid; + } + foreach my $db_uuid (@{$db_hosts}) + { + my $query = "SELECT COUNT(*) FROM hosts WHERE host_uuid = ".$anvil->Database->quote($db_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:db_uuid' => $db_uuid, + 's2:query' => $query, + }}); + foreach my $host_uuid (@{$db_hosts}) { - # This won't return until we're down. - $anvil->Database->shutdown({debug => 2}); -=cut - ### TODO: We need to have a way to tell clients to disconnect - ### and then shutdown cleanly. This "Wait for an hour" - ### is a kludge. - # This is us, Have we been up for at least an hour? - my $uptime = $anvil->Get->uptime(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); - if ($uptime > 3600) + my $count = $anvil->Database->query({debug => 2, uuid => $db_uuid, query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:host_uuid' => $host_uuid, + 's2:db_uuid' => $db_uuid, + 's2:count' => $count, + }}); + if (not $count) { - # backup and shut down. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"}); - - # Switch the read_uuid and then close - $anvil->data->{sys}{database}{read_uuid} = $first_uuid; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid}, + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0143", variables => { + db_uuid => $db_uuid, + host_uuid => $host_uuid, }}); - # Disconnect - $anvil->data->{cache}{database_handle}{$uuid}->disconnect; - delete $anvil->data->{cache}{database_handle}{$uuid}; - - # Create a backup, this is useful also for setting - # the mtime of the last time we were up. - my $dump_file = $anvil->Database->backup_database({debug => 3}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); + $all_in_hosts = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_in_hosts => $all_in_hosts }}); + } + } + } + + # Sort by UUID, skip the first, and see if we're one of the others. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_in_hosts => $all_in_hosts }}); + if ($all_in_hosts) + { + my $first_uuid = ""; + foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }}); + if (not $first_uuid) + { + $first_uuid = $uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }}); - # Stop the daemon - my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); - if ($return_code eq "0") - { - # Stopped the daemon. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); - } + # Skip the first UUID so it doesn't evaluate for + # shutdown. + next; + } + elsif ($uuid eq $host_uuid) + { + # This won't return until we're down. + $anvil->Database->shutdown({debug => 2}); } -=cut } } } - # If we're the active database, dump out database out and rsync it to our peers. + # If we're the active database, dump our database out and rsync it to our peers. my $peers = keys %{$anvil->data->{database}}; my $connections = $anvil->data->{sys}{database}{connections}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {