From 3fd0db15bfbc0193eb2b3e8e899c592ef6708829 Mon Sep 17 00:00:00 2001 From: Digimer Date: Fri, 11 Mar 2022 21:46:51 -0500 Subject: [PATCH 1/7] * This rather heavily reworks how database shutdowns works. It adds much more intelligent shutdown, tracking who is using the database, being able to mark a database as "offline" and waiting for users of the database to disconnect before it shuts down. * Also removed the variables for the database name and DB user name, setting them statically now. * Created Database->shutdown() to more kindly stop a local database server. * Added 'check_db_in_use_states()' to anvil-daemon to clean any stale entries marking a database as in use. Signed-off-by: Digimer --- Anvil/Tools.pm | 6 - Anvil/Tools/Database.pm | 388 ++++++++++++++++++++++++++++++------ cgi-bin/striker | 14 +- notes | 4 + share/words.xml | 9 +- tools/anvil-daemon | 69 ++++++- tools/striker-manage-peers | 4 +- tools/striker-prep-database | 4 +- 8 files changed, 423 insertions(+), 75 deletions(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index bd24705d..5b235a5f 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -874,15 +874,9 @@ sub _set_defaults locking_reap_age => 300, log_transactions => 0, maximum_batch_size => 25000, - # NOTE: Do NOT change this unless you are certain all machines that use this host - # have been likewise updated! - name => "anvil", read_uuid => "", test_table => "hosts", timestamp => "", - # NOTE: Do NOT change this unless you are certain all machines that use this host - # have been likewise updated! - user => "admin", use_handle => "", }, host_type => "", diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index b7713996..f4b51b93 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -91,6 +91,7 @@ my $THIS_FILE = "Database.pm"; # read_variable # refresh_timestamp # resync_databases +# shutdown # update_host_status # write # _add_to_local_config @@ -333,9 +334,9 @@ sub backup_database } my $start_time = time; - my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".sql"; + my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$anvil->Get->host_uuid().".sql"; $dump_file =~ s/\/\//\//g; - my $dump_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." ".$anvil->data->{sys}{database}{name}." > ".$dump_file."\""; + my $dump_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil > ".$dump_file."\""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dump_file => $dump_file, dump_call => $dump_call, @@ -1032,7 +1033,7 @@ sub configure_pgsql # Does the database user exist? my $create_user = 1; - my $database_user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + my $database_user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : "admin"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_user => $database_user }}); if (not $database_user) { @@ -1099,7 +1100,7 @@ sub configure_pgsql # Create the database, if needed. my $create_database = 1; - my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_name => $database_name }}); (my $database_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT datname FROM pg_catalog.pg_database;'\"", source => $THIS_FILE, line => __LINE__}); @@ -1329,10 +1330,6 @@ sub connect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::host_uuid" => $anvil->data->{sys}{host_uuid} }}); } - # This will be used in a few cases where the local DB ID is needed (or the lack of it being set - # showing we failed to connect to the local DB). - $anvil->data->{sys}{database}{local_uuid} = ""; - # This will be set to '1' if either DB needs to be initialized or if the last_updated differs on any node. $anvil->data->{sys}{database}{resync_needed} = 0; @@ -1377,6 +1374,8 @@ sub connect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { uuid => $uuid }}); next if ((not $uuid) or (not $anvil->Validate->uuid({uuid => $uuid}))); + # Have we been asked to connect to a specific DB? If so, and if this isn't the requested + # UUID, skip it. if (($db_uuid) && ($db_uuid ne $uuid)) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0191", variables => { @@ -1406,6 +1405,9 @@ sub connect password => $anvil->Log->is_secure($password), }}); + my $is_local = $anvil->Network->is_local({debug => $debug, host => $host}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }}); + # If there's no password, skip. if (not $password) { @@ -1417,7 +1419,7 @@ sub connect # usual), set it as if we had read it from the config file using the default. if (not $anvil->data->{database}{$uuid}{name}) { - $anvil->data->{database}{$uuid}{name} = $anvil->data->{sys}{database}{name}; + $anvil->data->{database}{$uuid}{name} = "anvil"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "database::${uuid}::name" => $anvil->data->{database}{$uuid}{name} }}); } @@ -1512,6 +1514,7 @@ sub connect AutoCommit => 1, pg_enable_utf8 => 1 }); }; + $test = "" if not defined $test; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 's1:test' => $test, 's2:$@' => $@, @@ -1579,8 +1582,6 @@ sub connect elsif ($dbh =~ /^DBI::db=HASH/) { # Woot! - $anvil->data->{sys}{database}{connections}++; - push @{$successful_connections}, $uuid; $anvil->data->{cache}{database_handle}{$uuid} = $dbh; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dbh => $dbh, @@ -1600,6 +1601,11 @@ sub connect $anvil->Database->read({set => $dbh}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'anvil->Database->read' => $anvil->Database->read }}); } + if (not $anvil->data->{sys}{database}{read_uuid}) + { + $anvil->data->{sys}{database}{read_uuid} = $uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }}); + } # Read the DB identifier and then check that we've not already connected to this DB. my $query = "SELECT system_identifier FROM pg_control_system();"; @@ -1661,16 +1667,52 @@ sub connect "cache::database_handle::${uuid}" => $anvil->data->{cache}{database_handle}{$uuid}, }}); + # Before I continue, see if this database is going offline. + my ($active_value, undef, undef) = $anvil->Database->read_variable({ + debug => $debug, + uuid => $uuid, + variable_name => "database::".$uuid."::active", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_value => $active_value }}); + if (not $active_value) + { + # If we're "retry", we just started up. + if (($retry) && ($is_local)) + { + # Set the variable saying we're active. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0698"}); + my $variable_uuid = $anvil->Database->insert_or_update_variables({ + uuid => $uuid, + variable_name => "database::".$uuid."::active", + variable_value => "1", + variable_default => "0", + variable_description => "striker_0294", + variable_section => "database", + variable_source_uuid => "NULL", + variable_source_table => "", + }); + } + else + { + # Don't use this database. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0699", variables => { host => $uuid }}); + $anvil->data->{cache}{database_handle}{$uuid}->disconnect; + delete $anvil->data->{cache}{database_handle}{$uuid}; + + if ($anvil->data->{sys}{database}{read_uuid} eq $uuid) + { + $anvil->data->{sys}{database}{read_uuid} = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }}); + } + next; + } + } + # Set the first ID to be the one I read from later. Alternatively, if this host is # local, use it. - if (($host eq $anvil->Get->host_name) or - ($host eq $anvil->Get->short_host_name) or - ($host eq "localhost") or - ($host eq "127.0.0.1") or - (not $anvil->data->{sys}{database}{read_uuid})) + if (($is_local) or (not $anvil->data->{sys}{database}{read_uuid})) { $anvil->data->{sys}{database}{read_uuid} = $uuid; - $anvil->data->{sys}{database}{local_uuid} = $uuid; $anvil->Database->read({set => $anvil->data->{cache}{database_handle}{$uuid}}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { @@ -1696,15 +1738,38 @@ sub connect 'anvil->Database->read' => $anvil->Database->read, "sys::database::timestamp" => $anvil->data->{sys}{database}{timestamp}, }}); + + # Record this as successful + $anvil->data->{sys}{database}{connections}++; + push @{$successful_connections}, $uuid; } # Before we try to connect, see if this is a local database and, if so, make sure it's setup. - my $is_local = $anvil->Network->is_local({debug => $debug, host => $host}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }}); if ($is_local) { $anvil->data->{sys}{database}{read_uuid} = $uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }}); + + # If we're a striker, set the variable saying we're active if we need to. + my ($active_value, undef, undef) = $anvil->Database->read_variable({ + debug => $debug, + uuid => $uuid, + variable_name => "database::".$uuid."::active", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_value => $active_value }}); + if (not $active_value) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0698"}); + my $variable_uuid = $anvil->Database->insert_or_update_variables({ + variable_name => "database::".$uuid."::active", + variable_value => "1", + variable_default => "0", + variable_description => "striker_0294", + variable_section => "database", + variable_source_uuid => "NULL", + variable_source_table => "", + }); + } } elsif (not $anvil->data->{sys}{database}{read_uuid}) { @@ -1750,7 +1815,7 @@ sub connect } # If we're a striker and no connections were found, start our database. - if (($local_host_type eq "striker") && (not $anvil->data->{sys}{database}{connections}) && ($db_count > 1)) + if (($local_host_type eq "striker") && (not $anvil->data->{sys}{database}{connections})) { # Tell the user we're going to try to load and start. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0650"}); @@ -1760,7 +1825,7 @@ sub connect my $backup_age = 0; my $youngest_dump = 0; my $directory = $anvil->data->{path}{directories}{pgsql}; - my $db_name = $anvil->data->{sys}{database}{name}; + my $db_name = "anvil"; my $dump_files = []; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { directory => $directory }}); @@ -5599,7 +5664,7 @@ sub initialize }}); # This just makes some logging cleaner below. - my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; my $say_server = $anvil->data->{database}{$uuid}{host}.":".$anvil->data->{database}{$uuid}{port}." -> ".$database_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { say_server => $say_server }}); @@ -5644,7 +5709,7 @@ sub initialize }}); # Read in the SQL file and replace #!variable!name!# with the database owner name. - my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : "admin"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { user => $user }}); my $sql = $anvil->Storage->read_file({file => $sql_file}); @@ -14219,7 +14284,7 @@ sub load_database } # Drop the existing database. - my $drop_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{dropdb}." ".$anvil->data->{sys}{database}{name}."\""; + my $drop_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{dropdb}." anvil\""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { drop_call => $drop_call }}); $output = ""; $return_code = ""; @@ -14259,7 +14324,7 @@ sub load_database } # Recreate the DB. - my $create_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner ".$anvil->data->{sys}{database}{user}." ".$anvil->data->{sys}{database}{name}."\""; + my $create_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner "."admin"." anvil\""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_call => $create_call }}); $output = ""; $return_code = ""; @@ -14287,7 +14352,7 @@ sub load_database } # Finally, load the database. - my $load_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." ".$anvil->data->{sys}{database}{name}." < ".$load_file."\""; + my $load_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." anvil < ".$load_file."\""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { load_call => $load_call }}); $output = ""; $return_code = ""; @@ -14458,7 +14523,7 @@ sub locking # If I have been asked to check, we will return the variable_uuid if a lock is set. if ($check) { - my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({variable_name => $variable_name}); + my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({debug => $debug, variable_name => $variable_name}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { lock_value => $lock_value, variable_uuid => $variable_uuid, @@ -14473,7 +14538,7 @@ sub locking { # We check to see if there is a lock before we clear it. This way we don't log that we # released a lock unless we really released a lock. - my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({variable_name => $variable_name}); + my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({debug => $debug, line => __LINE__, variable_name => $variable_name}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { lock_value => $lock_value, variable_uuid => $variable_uuid, @@ -14532,7 +14597,7 @@ sub locking }}); # Log that we've renewed the lock. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0044", variables => { host => $anvil->Get->host_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0044", variables => { host => $anvil->Get->host_name }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { set => $set }}); return($set); @@ -14547,7 +14612,7 @@ sub locking $waiting = 0; # See if we had a lock. - my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({variable_name => $variable_name}); + my ($lock_value, $variable_uuid, $modified_date) = $anvil->Database->read_variable({debug => $debug, variable_name => $variable_name}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { waiting => $waiting, lock_value => $lock_value, @@ -15161,20 +15226,33 @@ sub mark_active return(0); } - my $value = "false"; - if ($set) - { - $value = "true"; - } + my $value = $set ? 1 : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { value => $value }}); - my $state_uuid = $anvil->Database->insert_or_update_states({ - state_name => "db_in_use", - state_host_uuid => $anvil->data->{sys}{host_uuid}, - state_note => $value, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { state_uuid => $state_uuid }}); - return($state_uuid); + foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) + { + # TODO: When unsetting, should we just go directly to a deletion? This method gets us the + # state_uuid though, which is convenient. + my $pid = $$; + my $state_name = "db_in_use::".$uuid."::".$pid; + my $state_uuid = $anvil->Database->insert_or_update_states({ + state_name => $state_name, + state_host_uuid => $anvil->data->{sys}{host_uuid}, + state_note => $value, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { state_uuid => $state_uuid }}); + + # Being a state, if we're clearing, now delete the entry. + # NOTE: The 'state' table has no history schema + if (not $set) + { + # Broadly clear all states that are '0' now. + my $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND state_note != '1';"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0124", variables => { query => $query }}); + $anvil->Database->write({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + } + } + return(0); } @@ -15374,13 +15452,28 @@ sub query }}); # Make logging code a little cleaner - my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; - my $say_server = $anvil->data->{database}{$uuid}{host}.":".$anvil->data->{database}{$uuid}{port}." -> ".$database_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "s1:database::${uuid}::name" => $anvil->data->{database}{$uuid}{name}, + "s2:database::${uuid}::host" => $anvil->data->{database}{$uuid}{host}, + "s3:database::${uuid}::port" => $anvil->data->{database}{$uuid}{port}, + }}); + my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; + my $say_server = $anvil->data->{database}{$uuid}{host}.":"; + $say_server .= $anvil->data->{database}{$uuid}{port}." -> "; + $say_server .= $database_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "s1:database_name" => $database_name, + "s2:say_server" => $say_server, + }}); if (not $uuid) { # No database to talk to... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0072"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0072", variables => { + query => $query, + source => $source, + line => $line, + }}); return("!!error!!"); } elsif (not defined $anvil->data->{cache}{database_handle}{$uuid}) @@ -15581,14 +15674,21 @@ sub read_variable my $variable_name = $parameter->{variable_name} ? $parameter->{variable_name} : ""; my $variable_source_uuid = $parameter->{variable_source_uuid} ? $parameter->{variable_source_uuid} : ""; my $variable_source_table = $parameter->{variable_source_table} ? $parameter->{variable_source_table} : ""; - my $uuid = $parameter->{uuid} ? $parameter->{uuid} : $anvil->data->{sys}{database}{read_uuid}; + my $uuid = $parameter->{uuid} ? $parameter->{uuid} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + uuid => $uuid, variable_uuid => $variable_uuid, variable_name => $variable_name, variable_source_uuid => $variable_source_uuid, variable_source_table => $variable_source_table, }}); + if ((not $uuid) && ($anvil->data->{sys}{database}{read_uuid})) + { + $uuid = $anvil->data->{sys}{database}{read_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { uuid => $uuid }}); + } + if (not $variable_source_uuid) { $variable_source_uuid = "NULL"; @@ -15638,7 +15738,7 @@ AND my $variable_value = ""; my $mtime = ""; my $modified_date = ""; - my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $results = $anvil->Database->query({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { results => $results, @@ -15788,18 +15888,18 @@ sub resync_databases $column4 = $1."y_uuid"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { column4 => $column4 }}); } - my $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = ".$anvil->Database->quote($anvil->data->{sys}{database}{name})." AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND column_name = ".$anvil->Database->quote($column1).";"; + my $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'anvil' AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND column_name = ".$anvil->Database->quote($column1).";"; if ($column4) { - $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = ".$anvil->Database->quote($anvil->data->{sys}{database}{name})." AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2)." OR column_name = ".$anvil->Database->quote($column3)." OR column_name = ".$anvil->Database->quote($column4).");"; + $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'anvil' AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2)." OR column_name = ".$anvil->Database->quote($column3)." OR column_name = ".$anvil->Database->quote($column4).");"; } elsif ($column3) { - $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = ".$anvil->Database->quote($anvil->data->{sys}{database}{name})." AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2)." OR column_name = ".$anvil->Database->quote($column3).");"; + $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'anvil' AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2)." OR column_name = ".$anvil->Database->quote($column3).");"; } elsif ($column2) { - $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = ".$anvil->Database->quote($anvil->data->{sys}{database}{name})." AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2).");"; + $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'anvil' AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND (column_name = ".$anvil->Database->quote($column1)." OR column_name = ".$anvil->Database->quote($column2).");"; } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0124", variables => { query => $query }}); my $uuid_column = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; @@ -16206,6 +16306,182 @@ sub resync_databases } +=head2 shutdown + +This gracefully shuts down the local database, waiting for active connections to exit before doing so. This call only works on a Striker dashboard. It creates a dump file of the database as part of the shutdown. It always returns C<< 0 >>. + +B<< Note >>: This will not return until the database is stopped. This can take some time as it waits for all connections to close, with a C<< 600 >> second (five minute) timeout. + +This method takes no parameters. + +=cut +sub shutdown +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->shutdown()" }}); + + # Are we a striker? + my $host_type = $anvil->Get->host_type(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_type => $host_type }}); + if ($host_type ne "striker") + { + # Not a dashboard, nothing to do. + return(0); + } + + # Is the local databsae running? + my $running = $anvil->System->check_daemon({ + debug => $debug, + daemon => $anvil->data->{sys}{daemon}{postgresql}, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { running => $running }}); + if (not $running) + { + # Already stopped. + return(0); + } + + # Set the variable to say we're shutting down. + my $host_uuid = $anvil->Database->quote($anvil->Get->host_uuid); + $host_uuid =~ s/^'(.*)'$/$1/; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }}); + my $variable_uuid = $anvil->Database->insert_or_update_variables({ + variable_name => "database::".$host_uuid."::active", + variable_value => "0", + variable_default => "0", + variable_description => "striker_0294", + variable_section => "database", + variable_source_uuid => "NULL", + variable_source_table => "", + }); + + # Now wait for all clients to disconnect. + my $waiting = 1; + my $query = "SELECT state_uuid, state_name FROM states WHERE state_name LIKE 'db_in_use::".$host_uuid."::%' AND state_note = '1';"; + my $wait_time = 600; + my $stop_waiting = time + $wait_time; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:time' => time, + 's2:wait_time' => $wait_time, + 's3:stop_waiting' => $stop_waiting, + }}); + while($waiting) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $pids = ""; + my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { count => $count }}); + if ($count) + { + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + if ($count) + { + # Do the same checks we do in anvil-daemon + $anvil->System->pids(); + foreach my $row (@{$results}) + { + my $state_uuid = $row->[0]; + my $state_name = $row->[1]; + my $state_pid = ($state_name =~ /db_in_use::.*?::(.*)$/)[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:state_uuid' => $state_uuid, + 's2:state_name' => $state_name, + 's3:state_pid' => $state_pid, + 's4:our_pid' => $$, + }}); + if ($state_pid eq $$) + { + # This is us, ignore it. + next; + } + if (not exists $anvil->data->{pids}{$state_pid}) + { + # Reap the 'db_is_use'. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => { pid => $state_pid }}); + + my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0124", variables => { query => $query }}); + $anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); + } + else + { + my $command = $anvil->data->{pids}{$state_pid}{command}; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0142", variables => { command => $command }}); + + $pids .= $state_pid.","; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pids => $pids }}); + } + } + $pids =~ s/,$//; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pids => $pids }}); + } + } + + # If there's no count, we're done. + if (not $pids) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0697"}); + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { waiting => $waiting }}); + } + elsif (time > $stop_waiting) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0141", variables => { wait_time => $wait_time }}); + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { waiting => $waiting }}); + } + else + { + sleep 3; + } + } + + # Delete all jobs on our local database, and then stop the DB + $query = "DELETE FROM history.jobs; DELETE FROM jobs;"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }}); + $anvil->Database->write({debug => $debug, uuid => $host_uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + + # Mark ourself as no longer using the DB + my $pid = $$; + my $state_name = "db_in_use::".$host_uuid."::".$pid; + my $state_uuid = $anvil->Database->insert_or_update_states({ + state_name => $state_name, + state_host_uuid => $anvil->data->{sys}{host_uuid}, + state_note => "0", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { state_uuid => $state_uuid }}); + + $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND state_note != '1';"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0124", variables => { query => $query }}); + $anvil->Database->write({debug => $debug, uuid => $host_uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + + # Close our own connection. + $anvil->Database->locking({debug => $debug, release => 1}); + $anvil->data->{cache}{database_handle}{$host_uuid}->disconnect; + delete $anvil->data->{cache}{database_handle}{$host_uuid}; + + # Stop the daemon. + my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Stopped the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); + } + + return(0); +} + + =head2 update_host_status This is a variant on C<< insert_or_update_hosts >> designed only to update the power status of a host. @@ -16330,7 +16606,7 @@ sub write ### NOTE: The careful checks below are to avoid autovivication biting our arses later. # Make logging code a little cleaner - my $database_name = $anvil->data->{sys}{database}{name}; + my $database_name = "anvil"; my $say_server = $anvil->Words->string({key => "log_0129"}); if (($uuid) && (exists $anvil->data->{database}{$uuid}) && (defined $anvil->data->{database}{$uuid}{name}) && ($anvil->data->{database}{$uuid}{name})) { @@ -17224,7 +17500,7 @@ sub _find_column return('!!error!!') if not $table; - my $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = ".$anvil->Database->quote($anvil->data->{sys}{database}{name})." AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND column_name LIKE '\%_".$search_column."';"; + my $query = "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'anvil' AND table_schema = 'public' AND table_name = ".$anvil->Database->quote($table)." AND data_type = 'uuid' AND is_nullable = 'NO' AND column_name LIKE '\%_".$search_column."';"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; @@ -17636,7 +17912,7 @@ sub _test_access } # Make logging code a little cleaner - my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; my $say_server = $anvil->data->{database}{$uuid}{host}.":".$anvil->data->{database}{$uuid}{port}." -> ".$database_name; # Log our test @@ -17689,7 +17965,7 @@ sub _test_access { # We don't test this connection because, if it's down, we'll know # when it is tested. - my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : "anvil"; my $say_server = $anvil->data->{database}{$this_uuid}{host}.":".$anvil->data->{database}{$this_uuid}{port}." -> ".$database_name; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0193", variables => { server => $say_server }}); @@ -17710,7 +17986,7 @@ sub _test_access { # We don't test this connection because, if it's down, we'll know # when it is tested. - my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = defined $anvil->data->{database}{$this_uuid}{name} ? $anvil->data->{database}{$this_uuid}{name} : "anvil"; my $say_server = $anvil->data->{database}{$this_uuid}{host}.":".$anvil->data->{database}{$this_uuid}{port}." -> ".$database_name; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0194", variables => { server => $say_server }}); diff --git a/cgi-bin/striker b/cgi-bin/striker index a99e263d..12e39b89 100755 --- a/cgi-bin/striker +++ b/cgi-bin/striker @@ -6645,7 +6645,7 @@ sub process_sync_page next if not $anvil->Validate->subnet_mask({subnet_mask => $anvil->data->{network}{$local_host}{interface}{$interface}{subnet_mask}}); my ($network_type, $network_number) = ($interface =~ /^(.*?)(\d+)_/); - my $database_user = $anvil->data->{database}{$host_uuid}{user} ? $anvil->data->{database}{$host_uuid}{user} : $anvil->data->{sys}{database}{user}; + my $database_user = $anvil->data->{database}{$host_uuid}{user} ? $anvil->data->{database}{$host_uuid}{user} : "admin"; my $database_port = $anvil->data->{database}{$host_uuid}{port}; my $network_key = $network_type eq "bcn" ? "striker_0018" : "striker_0022"; my $say_network = $anvil->Words->string({key => $network_key, variables => { number => $network_number }}); @@ -6682,8 +6682,8 @@ sub process_sync_page my $host = $anvil->data->{database}{$uuid}{host} ? $anvil->data->{database}{$uuid}{host} : ""; # This should fail my $port = $anvil->data->{database}{$uuid}{port} ? $anvil->data->{database}{$uuid}{port} : 5432; - my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; - my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; + my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : "admin"; my $ping = $anvil->data->{database}{$uuid}{ping} ? $anvil->data->{database}{$uuid}{ping} : 1; my $password = $anvil->data->{database}{$uuid}{password} ? $anvil->data->{database}{$uuid}{password} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { @@ -6759,8 +6759,8 @@ sub delete_sync_peer my $uuid = $anvil->data->{cgi}{'delete'}{value}; my $host_name = $anvil->Get->host_name_from_uuid({host_uuid => $uuid}); my $host = $anvil->data->{database}{$uuid}{host} ? $anvil->data->{database}{$uuid}{host} : ""; # This should fail - my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; - my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; + my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : "admin"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uuid => $uuid, host_name => $host_name, @@ -6816,10 +6816,10 @@ sub add_sync_peer my ($anvil) = @_; # Break up the user, host and port. If anything goes wrong, we'll set an error and send it back. - my $pgsql_user = $anvil->data->{sys}{database}{user}; + my $pgsql_user = "admin"; my $host = $anvil->data->{cgi}{new_peer_access}{value}; my $password = $anvil->data->{cgi}{new_peer_password}{value}; - my $db_name = $anvil->data->{sys}{database}{name}; + my $db_name = "anvil"; my $ping = $anvil->data->{cgi}{new_peer_ping}{value} eq "on" ? 1 : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pgsql_user => $pgsql_user, diff --git a/notes b/notes index d29efd84..b4e33203 100644 --- a/notes +++ b/notes @@ -655,6 +655,10 @@ virsh attach-interface win2019_test bridge ifn_bridge1 --live --model virtio # Detach a network interface: virsh detach-interface win2019_test bridge --mac 52:54:00:ee:b5:1d +# Attach disks +virsh attach-disk srv34-nas /dev/drbd/by-res/srv34-nas/1 vdb --persistent --targetbus virtio --sourcetype block --subdriver raw + + # Change the MTU of a device; ip link set mtu 9000 diff --git a/share/words.xml b/share/words.xml index d4b65861..e33e6836 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1389,7 +1389,7 @@ The database connection error was: The connection to the database: [#!variable!name!#] on host: [#!variable!host!#:#!variable!port!#] was refused. Is the database server running? The connection to the database: [#!variable!name!#] on host: [#!variable!host!#:#!variable!port!#] failed because the name could not be translated to an IP address. Is this database server's host name in '/etc/hosts'? Successfully Connected to the database: [#!variable!name!#] (id: [#!variable!uuid!#]) on host: [#!variable!host!#:#!variable!port!#]. - query() was called without a database ID to query and 'sys::database::read_uuid' doesn't contain a database ID, either. Are any databases available?]]> + query() was called without a database ID to query and 'sys::database::read_uuid' doesn't contain a database ID, either. Are any databases available? The query source was: [#!variable!source!#:#!variable!line!#] -> [#!variable!query!#].]]> query() was asked to query the database with UUID: [#!variable!uuid!#] but there is no file handle open to the database. Was the connection lost?]]> About to run: [#!variable!uuid!#]:[#!variable!query!#] Log->secure' is not set.]]> @@ -2089,6 +2089,9 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The connection to: [#!variable!host!#] for the resource: [#!variable!resource!#] is in the connection state: [#!variable!connection_state!#]. Will try to connect to the peer and up the resource now. About to request the start of the resource: [#!variable!resource!#] on: [#!variable!host!#]. The peer: [#!variable!peer!#] is defined in the resource: [#!variable!resource!#] but we don't connect to it, ignoring it. + All clients using our database are gone, ready to stop the postgresql daemon. + [ Note ] - Marking our database as active. + [ Note ] - The Striker database host: [#!variable!host!#] is inactive, skipping it. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -2781,6 +2784,7 @@ If you are comfortable that the target has changed for a known reason, you can s This is the number of bytes transmitted (tx) by a network interface since it was last started. Stay Off This is the command used to provision the referenced server. + This indicates if a Striker's DB is available to be used. #!variable!number!#/sec @@ -3124,6 +3128,9 @@ We will sleep a bit and try again. [ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail. [ Warning ] - Timed out waiting for the connections to the peers. [ Warning ] - We're using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). but there is a job: [#!variable!job_command!#] is runnng, which might be why the RAM is high. NOT exiting while this program is running. + [ Warning ] - A no-longer active PID: [#!variable!pid!#] had marked our database as "in_use", but the PID is gone now. Reaping the flag. + [ Warning ] - We waited for: [#!variable!wait_time!#] seconds for all users of the local database to exit. Giving up waiting and taking the database down now. + [ Warning ] - The command: [#!variable!command!#] is still using our database. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 98d1696e..8a773b6e 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -606,6 +606,9 @@ sub handle_periodic_tasks # Check if any files have been uploaded to /mnt/shared/incoming on striker check_incoming($anvil); + + # Check for stale db_in_use states. + check_db_in_use_states($anvil); } # Now check to see if it's time to run less frequent tasks. @@ -639,6 +642,9 @@ sub handle_periodic_tasks } elsif ($uuid eq $host_uuid) { + # This won't return until we're down. + $anvil->Database->shutdown({debug => 2}); +=cut ### TODO: We need to have a way to tell clients to disconnect ### and then shutdown cleanly. This "Wait for an hour" ### is a kludge. @@ -674,6 +680,7 @@ sub handle_periodic_tasks $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); } } +=cut } } } @@ -807,6 +814,66 @@ sub handle_periodic_tasks return(0); } +### NOTE: This logic plays out in a slightly different way in Database->shutdown(). +# Check for stale db_in_use states. +sub check_db_in_use_states +{ + my ($anvil) = @_; + + # We only reap db_in_use entries for us. + $anvil->System->pids(); + my $host_uuid = $anvil->Database->quote($anvil->Get->host_uuid); + $host_uuid =~ s/^'(.*)'$/$1/; + my $query = " +SELECT + state_uuid, + state_name, + state_note +FROM + states +WHERE + state_name LIKE 'db_in_use::".$host_uuid."::%' +;"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if ($count) + { + foreach my $row (@{$results}) + { + my $state_uuid = $row->[0]; + my $state_name = $row->[1]; + my $state_note = $row->[2]; + my $state_pid = ($state_name =~ /db_in_use::.*?::(.*)$/)[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:state_uuid' => $state_uuid, + 's2:state_name' => $state_name, + 's3:state_note' => $state_note, + 's4:state_pid' => $state_pid, + }}); + + if (not exists $anvil->data->{pids}{$state_pid}) + { + # Reap the 'db_is_use'. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => { pid => $state_pid }}); + + my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";"; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); + $anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); + } + ### TODO: What are the chances of a PID being reused in the minute between + ### the program's death and us detecting it? Should we filter the + ### 'pids::::command' value against our programs and scan agents? + } + } + + return(0); +} + # On dashboards, this checks to see if any files are in /mnt/shared/incoming and, if so, that they've been processed. sub check_incoming { @@ -1348,7 +1415,7 @@ sub prep_database my $prep_database = 1; foreach my $uuid (keys %{$anvil->data->{database}}) { - my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$uuid.".sql"; + my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$uuid.".sql"; $dump_file =~ s/\/\//\//g; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); if (-e $dump_file) diff --git a/tools/striker-manage-peers b/tools/striker-manage-peers index 24df5a01..a7a862df 100755 --- a/tools/striker-manage-peers +++ b/tools/striker-manage-peers @@ -89,8 +89,8 @@ foreach my $host (sort {$a cmp $b} keys %{$anvil->data->{sorted}{db}}) { my $uuid = $anvil->data->{sorted}{db}{$host}; my $port = $anvil->data->{database}{$uuid}{port} ? $anvil->data->{database}{$uuid}{port} : 5432; - my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; - my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + my $name = $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : "anvil"; + my $user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : "admin"; my $password = $anvil->data->{database}{$uuid}{password} ? $anvil->data->{database}{$uuid}{password} : ""; print $anvil->Words->string({key => "message_0032", variables => { peer => $user."\@".$host.":".$port, diff --git a/tools/striker-prep-database b/tools/striker-prep-database index abbb4034..25e99c2a 100755 --- a/tools/striker-prep-database +++ b/tools/striker-prep-database @@ -278,7 +278,7 @@ if ($local_uuid) # Does the database user exist? my $create_user = 1; - my $database_user = $anvil->data->{database}{$local_uuid}{user} ? $anvil->data->{database}{$local_uuid}{user} : $anvil->data->{sys}{database}{user}; + my $database_user = $anvil->data->{database}{$local_uuid}{user} ? $anvil->data->{database}{$local_uuid}{user} : "admin"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { database_user => $database_user }}); if (not $database_user) { @@ -374,7 +374,7 @@ if ($local_uuid) # Create the database, if needed. my $create_database = 1; - my $database_name = $anvil->data->{database}{$local_uuid}{name} ? $anvil->data->{database}{$local_uuid}{name} : $anvil->data->{sys}{database}{name}; + my $database_name = $anvil->data->{database}{$local_uuid}{name} ? $anvil->data->{database}{$local_uuid}{name} : "anvil"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { database_name => $database_name }}); if (not $database_name) { From 513ce3b74e4fd169127b7a6d8379e8cfdae9be15 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 14 Mar 2022 16:40:35 -0400 Subject: [PATCH 2/7] Created 'striker-db-status' that reports the status of the databases to external tools. It's basic, but it works. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 26 +++++++++++++++++ tools/striker-db-status | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100755 tools/striker-db-status diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index f4b51b93..046d64be 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -1498,6 +1498,11 @@ sub connect } } + # This stores data used by striker-db-status + $anvil->data->{db_status}{$uuid}{access} = 0; + $anvil->data->{db_status}{$uuid}{active} = 0; + $anvil->data->{db_status}{$uuid}{details} = ""; + # Connect! my $dbh = ""; ### NOTE: The Database->write() method, when passed an array, will automatically disable @@ -1529,6 +1534,11 @@ sub connect name => $name, }}); + $anvil->data->{db_status}{$uuid}{details} = "error=".$@; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "db_status::${uuid}::details" => $anvil->data->{db_status}{$uuid}{details}, + }}); + push @{$failed_connections}, $uuid; my $message_key = "log_0065"; my $variables = { dbi_error => $DBI::errstr }; @@ -1588,6 +1598,11 @@ sub connect "cache::database_handle::${uuid}" => $anvil->data->{cache}{database_handle}{$uuid}, }}); + $anvil->data->{db_status}{$uuid}{access} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "db_status::${uuid}::access" => $anvil->data->{db_status}{$uuid}{access}, + }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0071", variables => { host => $host, port => $port, @@ -1691,6 +1706,11 @@ sub connect variable_source_uuid => "NULL", variable_source_table => "", }); + + $anvil->data->{db_status}{$uuid}{active} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "db_status::${uuid}::active" => $anvil->data->{db_status}{$uuid}{active}, + }}); } else { @@ -1708,6 +1728,12 @@ sub connect } } + # Still here? We're active + $anvil->data->{db_status}{$uuid}{active} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "db_status::${uuid}::active" => $anvil->data->{db_status}{$uuid}{active}, + }}); + # Set the first ID to be the one I read from later. Alternatively, if this host is # local, use it. if (($is_local) or (not $anvil->data->{sys}{database}{read_uuid})) diff --git a/tools/striker-db-status b/tools/striker-db-status new file mode 100755 index 00000000..bc56fcac --- /dev/null +++ b/tools/striker-db-status @@ -0,0 +1,62 @@ +#!/usr/bin/perl +# +# This is a machine parsable output of the database states. +# + +use strict; +use warnings; +use Anvil::Tools; +use Data::Dumper; +use Text::Diff; + +$| = 1; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +my $anvil = Anvil::Tools->new(); + +$anvil->Database->connect({debug => 2, check_for_resync => 0}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); +if (not $anvil->data->{sys}{database}{connections}) +{ + # No databases, exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, 'print' => 1, priority => "err", key => "error_0003"}); + $anvil->nice_exit({exit_code => 1}); +} + +$anvil->Get->switches(); + +print "# Access = talked to DB. Active = flagged as active and usable.\n"; +print "connections=".$anvil->data->{sys}{database}{connections}."\n"; +if ($anvil->data->{sys}{database}{connections}) +{ + foreach my $uuid (keys %{$anvil->data->{db_status}}) + { + my $host_name = $anvil->Get->host_name_from_uuid({host_uuid => $uuid}); + $host_name = "" if not $host_name; + my $access = $anvil->data->{db_status}{$uuid}{access}; + my $active = $anvil->data->{db_status}{$uuid}{active}; + my $details = $anvil->data->{db_status}{$uuid}{details}; + + # Show the state + print "host_name=".$host_name.",host_uuid=".$uuid.",access=".$access.",active=".$active."\n"; + + # If we want to show access failure details; +# print "host_name=".$host_name.",host_uuid=".$uuid.",access=".$access.",active=".$active; +# if (not $access) +# { +# print ",details:\n"; +# print "====\n"; +# print $details; +# print "===="; +# } +# print "\n"; + } +} + +$anvil->nice_exit({exit_code => 0}); From 0ebe589c93d1d0257c36f597e2d1bf528e6ca968 Mon Sep 17 00:00:00 2001 From: "Fabio M. Di Nitto" Date: Tue, 15 Mar 2022 05:50:32 +0100 Subject: [PATCH 3/7] Ship striker-db-status Signed-off-by: Fabio M. Di Nitto --- tools/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/Makefile.am b/tools/Makefile.am index d2d1c3ba..18dd6e5d 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -48,7 +48,8 @@ dist_sbin_SCRIPTS = \ striker-purge-target \ striker-scan-network \ striker-show-db-counts \ - striker-auto-initialize-all + striker-auto-initialize-all \ + striker-db-status fencedir = ${FASEXECPREFIX}/sbin From 7b090e16239f334744ed7487728ccf1d35fb1778 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 15 Mar 2022 22:33:42 -0400 Subject: [PATCH 4/7] * Updated Database->shutdown() to disconnect, stop the postgresql daemon, then reconnect. * Updated anvil-daemon to not stop a database until both/all DB hosts are in both/all DB's hosts table. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 14 +++++- share/words.xml | 1 + tools/anvil-daemon | 107 +++++++++++++++++++++------------------- 3 files changed, 69 insertions(+), 53 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 046d64be..5bf1a5d9 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -15262,6 +15262,7 @@ sub mark_active my $pid = $$; my $state_name = "db_in_use::".$uuid."::".$pid; my $state_uuid = $anvil->Database->insert_or_update_states({ + debug => $debug, state_name => $state_name, state_host_uuid => $anvil->data->{sys}{host_uuid}, state_note => $value, @@ -16471,6 +16472,9 @@ sub shutdown } } + $host_uuid = $anvil->Get->host_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }}); + # Delete all jobs on our local database, and then stop the DB $query = "DELETE FROM history.jobs; DELETE FROM jobs;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }}); @@ -16480,6 +16484,7 @@ sub shutdown my $pid = $$; my $state_name = "db_in_use::".$host_uuid."::".$pid; my $state_uuid = $anvil->Database->insert_or_update_states({ + debug => $debug, state_name => $state_name, state_host_uuid => $anvil->data->{sys}{host_uuid}, state_note => "0", @@ -16492,8 +16497,9 @@ sub shutdown # Close our own connection. $anvil->Database->locking({debug => $debug, release => 1}); - $anvil->data->{cache}{database_handle}{$host_uuid}->disconnect; - delete $anvil->data->{cache}{database_handle}{$host_uuid}; + + # Disconnect from all databases and then stop the daemon, then reconnect. + $anvil->Database->disconnect({debug => $debug}); # Stop the daemon. my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); @@ -16504,6 +16510,10 @@ sub shutdown $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); } + # Reconnect + $anvil->refresh(); + $anvil->Database->connect({debug => $debug}); + return(0); } diff --git a/share/words.xml b/share/words.xml index e33e6836..bfc3fe76 100644 --- a/share/words.xml +++ b/share/words.xml @@ -3131,6 +3131,7 @@ We will sleep a bit and try again. [ Warning ] - A no-longer active PID: [#!variable!pid!#] had marked our database as "in_use", but the PID is gone now. Reaping the flag. [ Warning ] - We waited for: [#!variable!wait_time!#] seconds for all users of the local database to exit. Giving up waiting and taking the database down now. [ Warning ] - The command: [#!variable!command!#] is still using our database. + [ Warning ] - While evaluating database shutdown, the host UUID: [#!variable!host_uuid!#] was not yet found in the database on host: [#!variable!db_uuid!#]. DB shutdown will not happen until all hosts are in all DBs. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 8a773b6e..69fd890a 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -250,7 +250,7 @@ while(1) check_ram($anvil); # Disconnect from the database(s) and sleep now. - $anvil->Database->disconnect(); + $anvil->Database->disconnect({debug => 2}); sleep(2); } @@ -621,71 +621,76 @@ sub handle_periodic_tasks host_uuid => $host_uuid, }}); - # Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our - # database. + # Are we a Striker and is there two or more connections? If so, evaluate if we should shut + # down our database. if ($host_type eq "striker") { if ($anvil->data->{sys}{database}{connections} > 1) { - # Sort by UUID, skip the first, and see if we're one of the others. - my $first_uuid = ""; + # Make sure that all active databases are in the host's table. If they're + # not, we're still early in setup. To do this, we create an array of hosts + # and then query both/all DBs to ensure they all have all hosts. + my $all_in_hosts = 1; + my $db_hosts = []; foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }}); - if (not $first_uuid) + push @{$db_hosts}, $uuid; + } + foreach my $db_uuid (@{$db_hosts}) + { + my $query = "SELECT COUNT(*) FROM hosts WHERE host_uuid = ".$anvil->Database->quote($db_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:db_uuid' => $db_uuid, + 's2:query' => $query, + }}); + foreach my $host_uuid (@{$db_hosts}) { - $first_uuid = $uuid; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }}); - - # Skip the first UUID so it doesn't evaluate for shutdown. - next; - } - elsif ($uuid eq $host_uuid) - { - # This won't return until we're down. - $anvil->Database->shutdown({debug => 2}); -=cut - ### TODO: We need to have a way to tell clients to disconnect - ### and then shutdown cleanly. This "Wait for an hour" - ### is a kludge. - # This is us, Have we been up for at least an hour? - my $uptime = $anvil->Get->uptime(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); - if ($uptime > 3600) + my $count = $anvil->Database->query({debug => 2, uuid => $db_uuid, query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:host_uuid' => $host_uuid, + 's2:db_uuid' => $db_uuid, + 's2:count' => $count, + }}); + if (not $count) { - # backup and shut down. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"}); - - # Switch the read_uuid and then close - $anvil->data->{sys}{database}{read_uuid} = $first_uuid; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid}, + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0143", variables => { + db_uuid => $db_uuid, + host_uuid => $host_uuid, }}); - # Disconnect - $anvil->data->{cache}{database_handle}{$uuid}->disconnect; - delete $anvil->data->{cache}{database_handle}{$uuid}; - - # Create a backup, this is useful also for setting - # the mtime of the last time we were up. - my $dump_file = $anvil->Database->backup_database({debug => 3}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); - - # Stop the daemon - my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); - if ($return_code eq "0") - { - # Stopped the daemon. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); - } + $all_in_hosts = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_in_hosts => $all_in_hosts }}); + } + } + } + + # Sort by UUID, skip the first, and see if we're one of the others. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_in_hosts => $all_in_hosts }}); + if ($all_in_hosts) + { + my $first_uuid = ""; + foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }}); + if (not $first_uuid) + { + $first_uuid = $uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }}); + + # Skip the first UUID so it doesn't evaluate for + # shutdown. + next; + } + elsif ($uuid eq $host_uuid) + { + # This won't return until we're down. + $anvil->Database->shutdown({debug => 2}); } -=cut } } } - # If we're the active database, dump out database out and rsync it to our peers. + # If we're the active database, dump our database out and rsync it to our peers. my $peers = keys %{$anvil->data->{database}}; my $connections = $anvil->data->{sys}{database}{connections}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { From 422d248cbef401dabe7f1e972f642368c997c962 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 15 Mar 2022 23:42:31 -0400 Subject: [PATCH 5/7] * Updated Database->insert_or_update_states() to not actually record unless the state_host_uuid exists in all available databases. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 42 +++++++++++++++++++++++++++++++++++++++++ share/words.xml | 1 + 2 files changed, 43 insertions(+) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 5bf1a5d9..1ce8fa02 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -12226,6 +12226,48 @@ sub insert_or_update_states return(""); } + # It's possible during initialization that a state could be set before the host is in the database's + # hosts table. This prevents that condition from causing a problem. + my $hosts_ok = 1; + my $db_uuids = []; + my $query = "SELECT COUNT(*) FROM hosts WHERE host_uuid = ".$anvil->Database->quote($state_host_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + if ($uuid) + { + push @{$db_uuids}, $uuid; + } + else + { + foreach my $db_uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) + { + push @{$db_uuids}, $db_uuid; + } + } + foreach my $db_uuid (@{$db_uuids}) + { + my $count = $anvil->Database->query({debug => 2, uuid => $db_uuid, query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's2:db_uuid' => $db_uuid, + 's2:count' => $count, + }}); + if (not $count) + { + $hosts_ok = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts_ok => $hosts_ok }}); + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0144", variables => { + state_info => $state_name." -> ".$state_note, + db_uuid => $db_uuid, + host_uuid => $state_host_uuid, + }}); + } + } + if (not $hosts_ok) + { + # Don't save. + return(""); + } + # If we don't have a UUID, see if we can find one for the given state server name. if (not $state_uuid) { diff --git a/share/words.xml b/share/words.xml index bfc3fe76..485e5c96 100644 --- a/share/words.xml +++ b/share/words.xml @@ -3132,6 +3132,7 @@ We will sleep a bit and try again. [ Warning ] - We waited for: [#!variable!wait_time!#] seconds for all users of the local database to exit. Giving up waiting and taking the database down now. [ Warning ] - The command: [#!variable!command!#] is still using our database. [ Warning ] - While evaluating database shutdown, the host UUID: [#!variable!host_uuid!#] was not yet found in the database on host: [#!variable!db_uuid!#]. DB shutdown will not happen until all hosts are in all DBs. + [ Warning ] - While preparing to record the state: [#!variable!state_info!#], the host UUID: [#!variable!host_uuid!#] was not yet found in the database on host: [#!variable!db_uuid!#]. NOT recording the state! From edf51adaecf285f51d210e5da62b6f0991783149 Mon Sep 17 00:00:00 2001 From: Digimer Date: Wed, 16 Mar 2022 00:35:26 -0400 Subject: [PATCH 6/7] * Changed 'anvil-manage-power' to no longer set the job progress to 50 prior to calling a reboot. It now sets to 100 immediately. Also reduced the uptime timer to five minutes from ten. * Updated striker-auto-initialize-all() to reconnect to DBs during waits to better detect when a DB is marked as offline. Signed-off-by: Digimer --- tools/anvil-daemon | 2 ++ tools/anvil-manage-power | 28 ++++++++++++++-------------- tools/striker-auto-initialize-all | 22 +++++++++++++++++++++- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 69fd890a..edac8a39 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -1281,6 +1281,8 @@ AND }}); } + ### TODO: This shouldn't be needed anymore. anvil-manage-power doesn't set the progress to '50' prior + ### to reboot anymore. # If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was # set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to # Lisa Seelye (@thedoh) for this idea! diff --git a/tools/anvil-manage-power b/tools/anvil-manage-power index 03b5b1fe..115075fb 100755 --- a/tools/anvil-manage-power +++ b/tools/anvil-manage-power @@ -188,7 +188,7 @@ sub do_poweroff my ($anvil, $task) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { task => $task }}); - # We'll wait until the system has at least 10 minutes of uptime, unless '--no-wait' was given. + # We'll wait until the system has at least 5 minutes of uptime, unless '--no-wait' was given. my $uptime = $anvil->data->{switches}{'no-wait'} ? 0 : $anvil->Get->uptime; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::no-wait" => $anvil->data->{switches}{'no-delay'}, @@ -196,17 +196,16 @@ sub do_poweroff }}); my $say_task = $task eq "poweroff" ? "message_0062" : "message_0063"; - my $percent = $task eq "poweroff" ? 100 : 50; print $anvil->Words->string({key => $say_task})."\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => $say_task}); # To minimize the trouble of a problem where the reboot needed flag isn't cleared, and so the system # wants to repeatedly reboot, we need to add a delay to not let anvil-daemon ask us to # reboot/power-off until the system uptime is more than ten minutes. - if (($uptime) && ($uptime < 600)) + if (($uptime) && ($uptime < 300)) { # We'll wait until the system has been running for ten minutes. - my $difference = 600 - $uptime; + my $difference = 300 - $uptime; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0224", variables => { task => $task eq "poweroff" ? "#!string!log_0225!#" : "#!string!log_0226!#", difference => $difference, @@ -235,16 +234,6 @@ sub do_poweroff $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); } - if ($job_uuid) - { - $anvil->Job->update_progress({ - debug => 2, - progress => $percent, - message => $say_task, - job_uuid => $job_uuid, - }); - } - # Make sure the 'reboot needed' flag is set. When 'anvil-daemon' starts, it will use this to confirm # that it is starting post-reboot and clear it. my $say_reason = $task eq "poweroff" ? "log_0689" : "log_0688"; @@ -259,6 +248,17 @@ sub do_poweroff host_status => $task eq "poweroff" ? "rebooting" : "stopping", }); + # If we have a job UUID, mark that we're done. + if ($job_uuid) + { + $anvil->Job->update_progress({ + debug => 2, + progress => 100, + message => $say_task, + job_uuid => $job_uuid, + }); + } + # Now do the deed. my $shell_call = $anvil->data->{path}{exe}{systemctl}." ".$task; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); diff --git a/tools/striker-auto-initialize-all b/tools/striker-auto-initialize-all index acaf3356..5eb32e8e 100755 --- a/tools/striker-auto-initialize-all +++ b/tools/striker-auto-initialize-all @@ -406,7 +406,12 @@ sub configure_machine_networks if ($waiting) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0269"}); - sleep 10 + + # Disconnect and reconnect in case a DB goes offline. + $anvil->Database->disconnect(); + sleep 10; + $anvil->refresh(); + $anvil->Database->connect(); } } @@ -547,7 +552,12 @@ sub run_manifests { # Wait a bit and check again. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0262"}); + + # Disconnect and reconnect in case a DB goes offline. + $anvil->Database->disconnect(); sleep 10; + $anvil->refresh(); + $anvil->Database->connect(); } else { @@ -805,7 +815,12 @@ sub initialize_machines if ($waiting) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0255"}); + + # Disconnect and reconnect in case a DB goes offline. + $anvil->Database->disconnect(); sleep 10; + $anvil->refresh(); + $anvil->Database->connect(); } } @@ -1371,7 +1386,12 @@ fi; { # Wait 30 seconds and try again $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0230", variables => { number => $striker_number }}); + + # Disconnect and reconnect in case a DB goes offline. + $anvil->Database->disconnect(); sleep 30; + $anvil->refresh(); + $anvil->Database->connect(); } } } From 8fbf5940027c68dbecdcfc4b5ffff72cc098e21a Mon Sep 17 00:00:00 2001 From: Digimer Date: Wed, 16 Mar 2022 13:59:21 -0400 Subject: [PATCH 7/7] Updated striker-prep-database to stop -> start postgres post-configure, and to connect -> disconnect to run the schema load logic. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 4 ++-- tools/striker-prep-database | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 1ce8fa02..365c1689 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -1646,7 +1646,7 @@ sub connect die; } - # If the '$test_table' isn't the same as 'sys::database::test_table', see if the core schema needs loading first. + # Check to see if the schema needs to be loaded. if ($test_table ne $anvil->data->{sys}{database}{test_table}) { my $query = "SELECT COUNT(*) FROM pg_catalog.pg_tables WHERE tablename=".$anvil->Database->quote($anvil->data->{defaults}{sql}{test_table})." AND schemaname='public';"; @@ -1664,7 +1664,7 @@ sub connect } } - # Now that I have connected, see if my 'hosts' table exists. + # Now that I have connected, see if the 'test_table' exists. $query = "SELECT COUNT(*) FROM pg_catalog.pg_tables WHERE tablename=".$anvil->Database->quote($test_table)." AND schemaname='public';"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); diff --git a/tools/striker-prep-database b/tools/striker-prep-database index 25e99c2a..dfa949c0 100755 --- a/tools/striker-prep-database +++ b/tools/striker-prep-database @@ -457,6 +457,18 @@ if ($local_uuid) } } + # In some cases, the database won't allow connections to the admin user. To deal with this, we'll + # call stop->start on the daemon (reload doesn't fix it). + my $return_code = $anvil->System->stop_daemon({daemon => "postgresql"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); + + $return_code = $anvil->System->start_daemon({daemon => "postgresql"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); + + # Connect and then disconnect from the database. This will trigger the schema load if needed. + $anvil->Database->connect(); + $anvil->Database->disconnect(); + ##################################################################################################### # NOTE: Below here is stuff that is for general setup. If it grows, we'll have to rename this tool. # #####################################################################################################