* Reworked how databases are stopped, now being handled in anvil-daemon. This way, initial starts will still do traditional resyncs, then shut down. This should allow the best of both worlds, where data is not lost on striker start/stop loss/recovery, but operate normally otherwise without delays.

* Updated Database->archive_database() to return the full path to the dump file.
* Disabled enabling the postgresql daemon.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 3 years ago
parent 922899ea78
commit 72b17ff1f9
  1. 2
      Anvil/Tools.pm
  2. 132
      Anvil/Tools/Database.pm
  3. 2
      Anvil/Tools/System.pm
  4. 7
      share/words.xml
  5. 106
      tools/anvil-daemon
  6. 6
      tools/striker-prep-database

@ -1176,7 +1176,7 @@ sub _set_paths
### NOTE: When System->manage_firewall() is done, search for and replace all
### instances where iptables is called and replace with firewall-cmd
### calls
iptables => "/usr/sbin/iptables".
iptables => "/usr/sbin/iptables",
'iptables-save' => "/usr/sbin/iptables-save",
journalctl => "/usr/bin/journalctl",
logger => "/usr/bin/logger",

@ -306,7 +306,7 @@ sub archive_database
This backs up the database to the C<< path::directories::pgsql >> directory as the file name C<< anvil_pg_dump.<host_uuid>.out >>.
If the backup is successful, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned.
If the backup is successful, the full path to the backup file is returned. If there is a problem, C<< !!error!! >> is returned.
B<< Note >>: This method must be called by the root user.
@ -333,6 +333,7 @@ sub backup_database
my $start_time = time;
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".out";
$dump_file =~ s/\/\//\//g;
my $dump_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." ".$anvil->data->{sys}{database}{name}." > ".$dump_file."\"";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
dump_file => $dump_file,
@ -354,9 +355,9 @@ sub backup_database
}});
# Clear the out file.
if (-e $dump_path)
if (-e $dump_file)
{
unlink $dump_path;
unlink $dump_file;
}
return('!!error!!');
}
@ -373,7 +374,7 @@ sub backup_database
size_bytes => $size_bytes,
}});
return(0);
return($dump_file);
}
=head2 check_file_locations
@ -823,9 +824,9 @@ sub configure_pgsql
$initialized = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { initialized => $initialized }});
# Enable it on boot. When two or more are available, one will shut down.
my $return_code = $anvil->System->enable_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }});
### NOTE: We no longer enable postgres on boot. When the first call is made to
### Database->connect on a striker, and no databases are available, it will
### start up the local daemon then.
}
}
@ -1161,10 +1162,6 @@ This module will return the number of databases that were successfully connected
Parameters;
=head3 all (optional, default '0')
If this is set, all available databases will be connected to. This will also allow resync's to run as needed.
=head3 check_for_resync (optional, default 0)
If set to C<< 1 >>, and there are 2 or more databases available, a check will be make to see if the databases need to be resync'ed or not. This is also set if the command line switch C<< --resync-db >> is used.
@ -1249,7 +1246,6 @@ sub connect
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->connect()" }});
my $all = defined $parameter->{all} ? $parameter->{all} : 0;
my $check_if_configured = defined $parameter->{check_if_configured} ? $parameter->{check_if_configured} : 0;
my $db_uuid = defined $parameter->{db_uuid} ? $parameter->{db_uuid} : "";
my $check_for_resync = defined $parameter->{check_for_resync} ? $parameter->{check_for_resync} : 0;
@ -1261,7 +1257,6 @@ sub connect
my $tables = defined $parameter->{tables} ? $parameter->{tables} : "";
my $test_table = defined $parameter->{test_table} ? $parameter->{test_table} : $anvil->data->{sys}{database}{test_table};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
all => $all,
check_if_configured => $check_if_configured,
db_uuid => $db_uuid,
check_for_resync => $check_for_resync,
@ -1319,14 +1314,6 @@ sub connect
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_for_resync => $check_for_resync }});
}
### NOTE: Experimental database configuration below
# If we're a striker, sort by UUID and the first one (doesn't matter which it actually is) becomes
# "primary". If this is a Striker and we connected to another Striker, shut down our database. Later,
# if no connections were found and this is a Striker, we'll start our database up (loading from our
# peer's last dump they sent us). If this is a node or DR host, we stop connecting after our first
# successful connections.
$anvil->data->{cache}{active_db} = "";
# Now setup or however-many connections
my $seen_connections = [];
my $failed_connections = [];
@ -1709,46 +1696,23 @@ sub connect
next;
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"sys::database::connections" => $anvil->data->{sys}{database}{connections},
}});
if (($anvil->data->{sys}{database}{connections}) && (not $all))
{
# Stop connecting here.
$anvil->data->{cache}{active_db} = $uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cache::active_db" => $anvil->data->{cache}{active_db},
}});
last;
}
}
# If we're not connecting to all databases, perform shutdown / backup / daemon management logic.
if (not $all)
{
# If we're a striker and no connections were found, start our database.
my $local_host_type = $anvil->Get->host_type();
my $local_host_uuid = $anvil->Get->host_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
local_host_type => $local_host_type,
local_host_uuid => $local_host_uuid,
}});
# Are we a Striker?
if ($local_host_type eq "striker")
{
# If we didn't connect to any database, it's possible/likely our peer is down and we need to
# start our local postgres database server.
if ((not $anvil->data->{sys}{database}{connections}) && (not $running))
if (($local_host_type eq "striker") && (not $anvil->data->{sys}{database}{connections}))
{
# Tell the user we're going to try to load and start.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0650"});
# Look for pgdumps. "Youngest" is the one with the highest mtime.
my $use_dump = "";
my $backup_age = 0;
my $youngest_dump = 0;
my $directory = $anvil->data->{path}{directories}{pgsql};
my $db_name = $anvil->data->{sys}{database}{name};
my $dump_files = [];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }});
local(*DIRECTORY);
@ -1763,7 +1727,7 @@ sub connect
file => $file,
full_path => $full_path,
}});
if ($file =~ /${db_name}_db_dump\.(.*).sql/)
if ($file =~ /\Q${db_name}_db_dump\.(.*).sql/)
{
$db_dump_uuid = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }});
@ -1771,20 +1735,32 @@ sub connect
# Is this one of our own dumps?
if ($db_dump_uuid eq $local_host_uuid)
{
# Ignore it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0653", variables => { full_path => $full_path });
# How recent is it?
$anvil->Storage->get_file_stats({debug => $debug, file => $full_path});
my $mtime = $anvil->data->{file_stat}{$full_path}{modified_time};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { mtime => $mtime }});
if ($mtime > $backup_age)
{
$backup_age = $mtime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { backup_age => $backup_age }});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0653", variables => { full_path => $full_path }});
next;
}
push @{$dump_files}, $full_path;
# Is this a database we're configured to use?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0651", variables => { full_path => $full_path });
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0651", variables => { full_path => $full_path }});
if ((not exists $anvil->data->{database}{$db_dump_uuid}) or (not $anvil->data->{database}{$db_dump_uuid}{host}))
{
# Not a database we're peered with anymore, ignore it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0652", variables => {
full_path => $full_path,
host_uuid => $db_dump_uuid,
});
}});
next;
}
}
@ -1795,7 +1771,7 @@ sub connect
# What's the mtime on this file?
$anvil->Storage->get_file_stats({debug => $debug, file => $full_path});
my $mtime = $anvil->data->{file_stat}{$file_path}{modified_time};
my $mtime = $anvil->data->{file_stat}{$full_path}{modified_time};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }});
if ($mtime > $youngest_dump)
@ -1810,14 +1786,19 @@ sub connect
}
}
# Did I find a dump to load?
if ($use_dump)
# Did I find a dump to load that's newer than my most recent backup?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
use_dump => $use_dump,
youngest_dump => $youngest_dump,
backup_age => $backup_age,
}});
if (($use_dump) && ($youngest_dump > $backup_age))
{
# Yup! This will start the database, if needed.
my $file_size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$file_path}{size}});
my $file_size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$file_path}{size}});
my $file_size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$use_dump}{size}});
my $file_size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$use_dump}{size}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0656", variables => {
file => $file_path,
file => $use_dump,
size => $file_size,
size_bytes => $file_size_bytes,
}});
@ -1825,17 +1806,30 @@ sub connect
my $problem = $anvil->Database->load_database({
debug => $debug,
backup => 1,
load_file => $full_path,
load_file => $use_dump,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
### TODO: Loop through the directory and delete all dumps from other
### Strikers. This way we won't roll back in time if we restart
### and there's been no new dumps made.
if ($problem)
{
# Failed, delete the file we tried to load.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "err", key => "error_0355", variables => { file => $use_dump }});
unlink $use_dump;
}
else
{
# Success! Delete all backups we found so we don't reload
# them in the future.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0657"});
foreach my $full_path (@{$dump_files})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0589", variables => { file => $full_path }});
unlink $full_path;
}
}
}
# Check if the dameon is running
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"});
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }});
if (not $running)
{
@ -1854,8 +1848,6 @@ sub connect
}
}
}
}
}
my $total = tv_interval ($start_time, [gettimeofday]);
#print "Total runtime: [".$total."]\n";
@ -14112,9 +14104,9 @@ sub load_database
if ($backup)
{
# Backup the database.
my $problem = $anvil->Database->backup_database({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
if ($problem)
my $dump_file = $anvil->Database->backup_database({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dump_file => $dump_file }});
if ($dump_file eq "!!error!!")
{
# Drop the firewall block
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }});

@ -4444,7 +4444,7 @@ sub start_daemon
This method stops a daemon. The return code from the stop request will be returned.
If the return code for the stop command wasn't read, C<< !!error!! >> is returned.
If the return code for the stop command is returned.
Parameters;

@ -500,6 +500,7 @@ The output, if any, was;
#!variable!output!#
====
</key>
<key name="error_0355">Failed to load the database file: [#!variable!file!#]. Deleting it so it's not considered in the next load attempt.</key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which lines are translatable -->
@ -1343,7 +1344,7 @@ Connecting to Database with configuration ID: [#!variable!uuid!#]
<key name="log_0061"><![CDATA[[ Error ] - The method Get->users_home() was asked to find the home directory for the user: [#!variable!user!#], but was unable to do so.]]></key>
<key name="log_0062">SSH session opened without a password to: [#!variable!target!#].</key>
<key name="log_0063"><![CDATA[The database: [#!variable!host!# -> #!variable!name!#] with the UUID: [#!variable!uuid!#] did not respond to pings and 'database::#!variable!uuid!#::ping' is not set to '0' in '#!data!path::configs::anvil.conf!#', skipping it.]]></key>
<key name="log_0064">[ Note ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] can not be used, skipping it.</key>
<key name="log_0064">[ Note ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] is not available, skipping it.</key>
<key name="log_0065">
The database connection error was:
----------
@ -2022,6 +2023,10 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0654">The database was dumped to: [#!variable!file!#] in: [#!variable!took!#] second(s). The size of the dump file is: [#!variable!size!#] (#!variable!size_bytes) bytes).</key>
<key name="log_0655">The database was loaded successfull from the file: [#!variable!file!#] in: [#!variable!took!#] second(s)!</key>
<key name="log_0656">No databases were available, so we will become primary after loading: [#!variable!file!#], which is: [#!variable!size!#] (#!variable!size_bytes!# bytes). Please be patient, this could take a moment.</key>
<key name="log_0657">The database was loaded, clear it and other DB dumps out now so that they don't get reloaded again in the future.</key>
<key name="log_0658">Sync'ed the file: [#!variable!file!#] to the peer Striker: [#!variable!host_name!#]. The sync took: [#!variable!took!#] seconds, and the file was: [#!variable!size!#] (#!variable!size_bytes!# bytes).</key>
<key name="log_0659">We're going to shut down our database. Creating a backup first.</key>
<key name="log_0660">Stopped the postgresql daemon as a peer is currently primary.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -1,4 +1,4 @@
# #!/usr/bin/perl
#!/usr/bin/perl
#
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster
# nodes and DR hosts.
@ -509,18 +509,112 @@ sub handle_periodic_tasks
# Now check to see if it's time to run less frequent tasks.
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check})
{
if ($type eq "striker")
my $host_type = $anvil->Get->host_type();
my $host_uuid = $anvil->Get->host_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_type => $host_type,
host_uuid => $host_uuid,
}});
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our
# database.
if ($host_type eq "striker")
{
if ($anvil->data->{sys}{database}{connections} > 1)
{
# Sort by UUID, skip the first, and see if we're one of the others.
my $first_uuid = "";
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uuid => $uuid }});
if (not $first_uuid)
{
$first_uuid = $uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { first_uuid => $first_uuid }});
}
elsif ($uuid eq $host_uuid)
{
# This is us, backup and shut down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"});
# Switch the read_uuid and then close
$anvil->data->{sys}{database}{read_uuid} = $first_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }});
# Disconnect
$anvil->data->{cache}{database_handle}{$uuid}->disconnect;
delete $anvil->data->{cache}{database_handle}{$uuid};
# Create a backup, this is useful also for setting the mtime of the last time
# we were up.
my $dump_file = $anvil->Database->backup_database({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
# Stop the daemon
my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
if ($return_code eq "0")
{
# Stopped the daemon.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"});
}
}
}
}
# If we're the active database, dump out database out and rsync it to our peers.
my $peers = keys $anvil->data->{database};
my $host_uuid = $anvil->Get->host_uuid;
my $peers = keys %{$anvil->data->{database}};
my $connections = $anvil->data->{sys}{database}{connections};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peers => $peers,
host_uuid => $host_uuid,
connections => $connections,
}});
if ($anvil->data->{sys}{database}{local_uuid} eq $host_uuid)
if (exists $anvil->data->{cache}{database_handle}{$host_uuid})
{
# Verify that the database is up.
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }});
if ($running)
{
# Backup our DB.
my $dump_file = $anvil->Database->backup_database({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
# Now rsync it to our peer(s)
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
{
next if $this_host_uuid eq $host_uuid;
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/";
my $password = $anvil->data->{database}{$this_host_uuid}{password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
destination => $destination,
password => $anvil->Log->is_secure($password),
}});
my $start_time = time;
my $failed = $anvil->Storage->rsync({
debug => 2,
destination => $destination,
password => $password,
source => $dump_file,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
my $rsync_time = time - $start_time;
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}});
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}});
my $target_name = $anvil->Get->host_name_from_uuid({debug => 2, host_uuid => $this_host_uuid});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => {
file => $dump_file,
host_name => $target_name,
took => $rsync_time,
size => $size,
size_bytes => $size_bytes,
}});
}
}
}
}
}

@ -199,9 +199,9 @@ if ($local_uuid)
# Started the daemon.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"});
# Make sure it is enabled on boot.
my $return_code = $anvil->System->enable_daemon({debug => 2, daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
### NOTE: We no longer enable postgres on boot. When the first call is made to
### Database->connect on a striker, and no databases are available, it will
### start up the local daemon then.
}
else
{

Loading…
Cancel
Save