Reworked Database->_test_access to do a general reconnect

* Before, it would try to reconnect to just the lost DB, which could
  trigger an error.

Signed-off-by: Madison Kelly <mkelly@alteeve.com>
main
Madison Kelly 7 months ago
parent 574b2dccae
commit 9db9f81104
  1. 143
      Anvil/Tools/Database.pm
  2. 4
      share/words.xml

@ -20659,11 +20659,11 @@ sub _mark_database_as_behind
=head2 _test_access =head2 _test_access
This method takes a database UUID and tests the connection to it using the DBD 'ping' method. If it fails, open references to the database are removed or replaced, then an attempt to reconnect is made. This method takes a database UUID and tests the connection to it using the DBD 'ping' method. If it fails, the database connections will be refreshed. If after this there is still no connection, C<< 1 >> is returned. If the connection is up (immediately or after reconnect), C<< 0 >> is returned.
This exists to handle the loss of a database mid-run where a normal query, which isn't wrapped in a query, could hang indefinately. This exists to handle the loss of a database mid-run where a normal query, which isn't wrapped in a query, could hang indefinately.
B<< Note >>: If there is no active handle, this returns 0 immediately. B<< Note >>: If there is no active handle, this returns C<< 1 >> immediately without trying to reconnect.
=cut =cut
sub _test_access sub _test_access
@ -20681,9 +20681,11 @@ sub _test_access
}}); }});
# If the handle is down, return 0. # If the handle is down, return 0.
my $problem = 1;
if ((not exists $anvil->data->{cache}{database_handle}{$uuid}) or (not $anvil->data->{cache}{database_handle}{$uuid})) if ((not exists $anvil->data->{cache}{database_handle}{$uuid}) or (not $anvil->data->{cache}{database_handle}{$uuid}))
{ {
return(0); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
return($problem);
} }
# Make logging code a little cleaner # Make logging code a little cleaner
@ -20693,28 +20695,6 @@ sub _test_access
# Log our test # Log our test
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0087", variables => { server => $say_server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0087", variables => { server => $say_server }});
# TODO: Is there a use for this anymore?
if (0)
{
# Ping works. Try a quick test query.
my $query = "SELECT 1";
my $DBreq = $anvil->data->{cache}{database_handle}{$uuid}->prepare($query) or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0075", variables => {
query => $query,
server => $say_server,
db_error => $DBI::errstr,
}});
# Give the test query a few seconds to respond, just in case we have some latency to a remote DB.
alarm(10);
$DBreq->execute() or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0076", variables => {
query => $query,
server => $say_server,
db_error => $DBI::errstr,
}});
# If we're here, we made contact.
alarm(0);
}
# Check using ping. Returns '1' on success, '0' on fail. # Check using ping. Returns '1' on success, '0' on fail.
alarm(120); alarm(120);
my $connected = $anvil->data->{cache}{database_handle}{$uuid}->ping(); my $connected = $anvil->data->{cache}{database_handle}{$uuid}->ping();
@ -20722,107 +20702,50 @@ sub _test_access
alarm(0); alarm(0);
if (not $connected) if (not $connected)
{ {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0192", variables => { server => $say_server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0192", variables => { server => $say_server }});
# Try to reconnect. # Try to reconnect.
$anvil->data->{sys}{database}{connections}--; $anvil->Database->reconnect({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::connections" => $anvil->data->{sys}{database}{connections} }});
# If this was the DB we were reading from or that the use_db_handle matches, and another DB
# appears to still be up, switch to one of the others.
if ($anvil->data->{sys}{database}{connections})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"sys::database::use_handle" => $anvil->Database->read, "sys::database::connections" => $anvil->data->{sys}{database}{connections},
"cache::database_handle::${uuid}" => $anvil->data->{cache}{database_handle}{$uuid}, "cache::database_handle::${uuid}" => $anvil->data->{cache}{database_handle}{$uuid},
}}); }});
if ($anvil->Database->read eq $anvil->data->{cache}{database_handle}{$uuid})
{
foreach my $this_uuid (keys %{$anvil->data->{cache}{database_handle}})
{
# We don't test this connection because, if it's down, we'll know
# when it is tested.
my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : "anvil";
my $say_server = $anvil->data->{database}{$this_uuid}{host}.":".$anvil->data->{database}{$this_uuid}{port}." -> ".$database_name;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0193", variables => { server => $say_server }});
$anvil->Database->read({set => $anvil->data->{cache}{database_handle}{$this_uuid}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'anvil->Database->read' => $anvil->Database->read }});
last;
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { if ($anvil->data->{cache}{database_handle}{$uuid})
uuid => $uuid,
"sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid},
}});
if ($uuid eq $anvil->data->{sys}{database}{read_uuid})
{
# We were reading from this DB, switch.
foreach my $this_uuid (keys %{$anvil->data->{cache}{database_handle}})
{ {
# We don't test this connection because, if it's down, we'll know alarm(120);
# when it is tested. my $connected = $anvil->data->{cache}{database_handle}{$uuid}->ping();
my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : "anvil"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { connected => $connected }});
my $say_server = $anvil->data->{database}{$this_uuid}{host}.":".$anvil->data->{database}{$this_uuid}{port}." -> ".$database_name; alarm(0);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0194", variables => { server => $say_server }});
$anvil->data->{sys}{database}{read_uuid} = $this_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }});
last;
}
}
if ($connected)
{
# We reconnected.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0854", variables => { server => $say_server }});
$problem = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
return($problem);
} }
else else
{ {
# We're in trouble if we don't reconnect... # The tartget DB is gone.
$anvil->Database->read({set => "delete"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0179", variables => { server => $say_server }});
$anvil->data->{sys}{database}{read_uuid} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return($problem);
'anvil->Database->read' => $anvil->Database->read,
"sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid},
}});
} }
# Delete the old handle and then try to reconnect. If the reconnect succeeds, and this is the
# local database, this database will be re-selected as default for reads.
delete $anvil->data->{cache}{database_handle}{$uuid};
my $delay = 5;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0195", variables => {
delay => $delay,
server => $say_server,
}});
sleep $delay;
$anvil->Database->connect({debug => $debug, db_uuid => $uuid});
# If we're down to '0' databases, error out.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::connections" => $anvil->data->{sys}{database}{connections} }});
if (not $anvil->data->{sys}{database}{connections})
{
# No connections are left.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0366"});
# It's possible the network was just reconfigured, and they were trying to updated a
# job in the database. If so, this failure can be hit. To handle this, we'll check
# if 'sys::reboot' is set. If so, we'll reboot now.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::reboot" => $anvil->data->{sys}{reboot} }});
if ($anvil->data->{sys}{reboot})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0196"});
my $shell_call = $anvil->data->{path}{exe}{systemctl}." reboot";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code }});
} }
return(1); else
{
# The tartget DB is gone.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0179", variables => { server => $say_server }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
return($problem);
} }
} }
# Success! # Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0088"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0088"});
$problem = 0;
return(0); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
return($problem);
} }

@ -1991,7 +1991,7 @@ The database connection error was:
<key name="log_0189">Database archiving is disabled, skipping archive checks.</key> <key name="log_0189">Database archiving is disabled, skipping archive checks.</key>
<key name="log_0190">Peer: [#!variable!peer!#], database: [#!variable!name!#], password: [#!variable!password!#], host UUID: [#!variable!uuid!#]</key> <key name="log_0190">Peer: [#!variable!peer!#], database: [#!variable!name!#], password: [#!variable!password!#], host UUID: [#!variable!uuid!#]</key>
<key name="log_0191">Connection only to: [#!variable!db_uuid!#], skipping: [#!variable!uuid!#].</key> <key name="log_0191">Connection only to: [#!variable!db_uuid!#], skipping: [#!variable!uuid!#].</key>
<key name="log_0192">The connection to the database: [#!variable!server!#] has failed. Will attempt to reconnect.</key> <key name="log_0192">The connection to the database: [#!variable!server!#] has failed. Will attempt to reconnect to databases-.</key>
<key name="log_0193">Switching the default database handle to use the database: [#!variable!server!#] prior to reconnect attempt.</key> <key name="log_0193">Switching the default database handle to use the database: [#!variable!server!#] prior to reconnect attempt.</key>
<key name="log_0194">Switching the default database to read from to the database: [#!variable!server!#] prior to reconnect attempt.</key> <key name="log_0194">Switching the default database to read from to the database: [#!variable!server!#] prior to reconnect attempt.</key>
<key name="log_0195">Ready to try to reconnect to: [#!variable!server!#], but delaying for: [#!variable!delay!#] seconds to give the database a chance to come back online in case this is a transient issue.</key> <key name="log_0195">Ready to try to reconnect to: [#!variable!server!#], but delaying for: [#!variable!delay!#] seconds to give the database a chance to come back online in case this is a transient issue.</key>
@ -2741,6 +2741,7 @@ old key: [#!variable!old_key!#]
new key: [#!variable!new_key!#]</key> new key: [#!variable!new_key!#]</key>
<key name="log_0852">Finished configuring bonds.</key> <key name="log_0852">Finished configuring bonds.</key>
<key name="log_0853">Now configuring bridges.</key> <key name="log_0853">Now configuring bridges.</key>
<key name="log_0854">The connection to the database: [#!variable!server!#] has been restored!</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -4203,6 +4204,7 @@ We will try to proceed anyway.</key>
<key name="warning_0176">[ Warning ] - The DB query: [#!variable!query!#] failed with the error: [#!variable!error!#].</key> <key name="warning_0176">[ Warning ] - The DB query: [#!variable!query!#] failed with the error: [#!variable!error!#].</key>
<key name="warning_0177">[ Warning ] - SQL quoting string: [#!variable!string!#] failed with the error: [#!variable!error!#].</key> <key name="warning_0177">[ Warning ] - SQL quoting string: [#!variable!string!#] failed with the error: [#!variable!error!#].</key>
<key name="warning_0178">[ Warning ] - About to run 'anvil-configure-host'. This is likely going to take the network down, so we will hold here until this job is complete.</key> <key name="warning_0178">[ Warning ] - About to run 'anvil-configure-host'. This is likely going to take the network down, so we will hold here until this job is complete.</key>
<key name="warning_0179">[ Warning ] - The connection to the database: [#!variable!server!#] was not restored, unable to this database.</key>
</language> </language>
<!-- 日本語 --> <!-- 日本語 -->

Loading…
Cancel
Save