diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 97a78bcc..c51fd5d7 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -874,10 +874,14 @@ sub _set_defaults locking_reap_age => 300, log_transactions => 0, maximum_batch_size => 25000, + # NOTE: Do NOT change this unless you are certain all machines that use this host + # have been likewise updated! name => "anvil", read_uuid => "", test_table => "hosts", timestamp => "", + # NOTE: Do NOT change this unless you are certain all machines that use this host + # have been likewise updated! user => "admin", use_handle => "", }, @@ -1077,6 +1081,7 @@ sub _set_paths html => "/var/www/html", ifcfg => "/etc/sysconfig/network-scripts", journald => "/var/log/journal", + pgsql => "/var/lib/pgsql/", resource_status => "/sys/kernel/debug/drbd/resources", scan_agents => "/usr/sbin/scancore-agents", shared => { @@ -1146,6 +1151,7 @@ sub _set_paths dnf => "/usr/bin/dnf", drbdadm => "/usr/sbin/drbdadm", drbdsetup => "/usr/sbin/drbdsetup", + dropdb => "/usr/bin/dropdb", echo => "/usr/bin/echo", ethtool => "/usr/sbin/ethtool", expect => "/usr/bin/expect", @@ -1167,6 +1173,10 @@ sub _set_paths ip => "/usr/sbin/ip", 'ipmi-oem' => "/usr/sbin/ipmi-oem", ipmitool => "/usr/bin/ipmitool", + ### NOTE: When System->manage_firewall() is done, search for and replace all + ### instances where iptables is called and replace with firewall-cmd + ### calls + iptables => "/usr/sbin/iptables". 'iptables-save' => "/usr/sbin/iptables-save", journalctl => "/usr/bin/journalctl", logger => "/usr/bin/logger", @@ -1196,6 +1206,7 @@ sub _set_paths pcs => "/usr/sbin/pcs", perccli64 => "/opt/MegaRAID/perccli/perccli64", ping => "/usr/bin/ping", + pg_dump => "/usr/bin/pg_dump", pgrep => "/usr/bin/pgrep", ps => "/usr/bin/ps", psql => "/usr/bin/psql", diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 6eb556ac..08ff7748 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -17,6 +17,7 @@ my $THIS_FILE = "Database.pm"; ### Methods; # archive_database +# backup_database # check_file_locations # check_lock_age # check_for_schema @@ -78,6 +79,7 @@ my $THIS_FILE = "Database.pm"; # insert_or_update_upses # insert_or_update_users # insert_or_update_variables +# load_database # lock_file # locking # manage_anvil_conf @@ -300,6 +302,80 @@ sub archive_database } +=head2 backup_database + +This backs up the database to the C<< path::directories::pgsql >> directory as the file name C<< anvil_pg_dump..out >>. + +If the backup is successful, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned. + +B<< Note >>: This method must be called by the root user. + +B<< Note >>: If C<< sys::database::name >> has been changed, the dump file name will match. + +This method takes no parameters. + +=cut +sub backup_database +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->backup_database()" }}); + + # Only the root user can do this + if (($< != 0) && ($> != 0)) + { + # Not root + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0352"}); + return('!!error!!'); + } + + my $start_time = time; + my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".out"; + my $dump_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." ".$anvil->data->{sys}{database}{name}." > ".$dump_file."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + dump_file => $dump_file, + dump_call => $dump_call, + }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $dump_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + if ($return_code) + { + # Dump failed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0351", variables => { + shell_call => $dump_call, + return_code => $return_code, + output => $output, + }}); + + # Clear the out file. + if (-e $dump_path) + { + unlink $dump_path; + } + return('!!error!!'); + } + + # Record the stats + $anvil->Storage->get_file_stats({debug => $debug, file_path => $dump_file}); + my $dump_time = time - $start_time; + my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}}); + my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0654", variables => { + file => $dump_file, + took => $dump_time, + size => $size, + size_bytes => $size_bytes, + }}); + + return(0); +} + =head2 check_file_locations This method checks to see that there is a corresponding entry in C<< file_locations >> for all Anvil! systems and files in the database. Any that are found to be missing will be set to C<< file_location_active >> -> c<< false >>. @@ -722,35 +798,34 @@ sub configure_pgsql return(1); } - # First, is it running? - my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); + # First, is it running and is it initialized? + my $initialized = 0; + my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { running => $running }}); - - if (not $running) + if ((not $running) && (not -e $anvil->data->{path}{configs}{'pg_hba.conf'})) { - # Do we need to initialize the databae? + # Initialize. Record that we did so, so that we know to start the daemon. + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'postgresql-setup'}." initdb", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { output => $output, return_code => $return_code }}); + + # Did it succeed? if (not -e $anvil->data->{path}{configs}{'pg_hba.conf'}) { - # Initialize. - my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'postgresql-setup'}." initdb", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { output => $output, return_code => $return_code }}); + # Failed... + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0050"}); + return("!!error!!"); + } + else + { + # Initialized! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0055"}); - # Did it succeed? - if (not -e $anvil->data->{path}{configs}{'pg_hba.conf'}) - { - # Failed... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0050"}); - return("!!error!!"); - } - else - { - # Initialized! - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0055"}); - - # Enable it on boot. - my $return_code = $anvil->System->enable_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); - } + $initialized = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { initialized => $initialized }}); + + # Enable it on boot. When two or more are available, one will shut down. + my $return_code = $anvil->System->enable_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); } } @@ -859,19 +934,23 @@ sub configure_pgsql # Start or restart the daemon? if (not $running) { - # Start the daemon. - my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); - if ($return_code eq "0") - { - # Started the daemon. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); - } - else + # Did we initialize? + if ($initialized) { - # Failed to start - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); - return("!!error!!"); + # Start the daemon. + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + } + else + { + # Failed to start + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); + return("!!error!!"); + } } } elsif (($update_postgresql_file) or ($update_pg_hba_file)) @@ -1082,6 +1161,10 @@ This module will return the number of databases that were successfully connected Parameters; +=head3 all (optional, default '0') + +If this is set, all available databases will be connected to. This will also allow resync's to run as needed. + =head3 check_for_resync (optional, default 0) If set to C<< 1 >>, and there are 2 or more databases available, a check will be make to see if the databases need to be resync'ed or not. This is also set if the command line switch C<< --resync-db >> is used. @@ -1104,6 +1187,10 @@ If set, the connection will be made only to the database server matching the UUI If set to C<< 1 >>, no attempt to ping a target before connection will happen, even if C<< database::::ping = 1 >> is set. +=head3 retry (optional, default '0') + +This method will try to recall itself if this is a Striker and it found no available databases, and so became primary. If this is set, it won't try to become primary a second time. + =head3 sensitive (optional, default '0') If set to C<< 1 >>, the caller is considered time sensitive and most checks are skipped. This is used when a call must respond as quickly as possible. @@ -1162,20 +1249,24 @@ sub connect my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->connect()" }}); + my $all = defined $parameter->{all} ? $parameter->{all} : 0; my $check_if_configured = defined $parameter->{check_if_configured} ? $parameter->{check_if_configured} : 0; my $db_uuid = defined $parameter->{db_uuid} ? $parameter->{db_uuid} : ""; - my $no_ping = defined $parameter->{no_ping} ? $parameter->{no_ping} : 0; my $check_for_resync = defined $parameter->{check_for_resync} ? $parameter->{check_for_resync} : 0; + my $no_ping = defined $parameter->{no_ping} ? $parameter->{no_ping} : 0; + my $retry = defined $parameter->{retry} ? $parameter->{retry} : 0; my $sensitive = defined $parameter->{sensitive} ? $parameter->{sensitive} : 0; my $source = defined $parameter->{source} ? $parameter->{source} : "core"; my $sql_file = defined $parameter->{sql_file} ? $parameter->{sql_file} : $anvil->data->{path}{sql}{'anvil.sql'}; my $tables = defined $parameter->{tables} ? $parameter->{tables} : ""; my $test_table = defined $parameter->{test_table} ? $parameter->{test_table} : $anvil->data->{sys}{database}{test_table}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + all => $all, check_if_configured => $check_if_configured, db_uuid => $db_uuid, - no_ping => $no_ping, check_for_resync => $check_for_resync, + no_ping => $no_ping, + retry => $retry, sensitive => $sensitive, source => $source, sql_file => $sql_file, @@ -1228,6 +1319,14 @@ sub connect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_for_resync => $check_for_resync }}); } + ### NOTE: Experimental database configuration below + # If we're a striker, sort by UUID and the first one (doesn't matter which it actually is) becomes + # "primary". If this is a Striker and we connected to another Striker, shut down our database. Later, + # if no connections were found and this is a Striker, we'll start our database up (loading from our + # peer's last dump they sent us). If this is a node or DR host, we stop connecting after our first + # successful connections. + $anvil->data->{cache}{active_db} = ""; + # Now setup or however-many connections my $seen_connections = []; my $failed_connections = []; @@ -1369,13 +1468,14 @@ sub connect }}); if (not $test) { - # Something went wrong... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0064", variables => { + # Either the Striker hosting this is down, or it's not primary and stopped its + # database. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, priority => "alert", key => "log_0064", variables => { uuid => $uuid, host => $host, name => $name, }}); - + push @{$failed_connections}, $uuid; my $message_key = "log_0065"; my $variables = { dbi_error => $DBI::errstr }; @@ -1602,12 +1702,159 @@ sub connect target_version => $remote_schema_version, }}); - # Delete the information about this database. We'll try again on nexy + # Delete the information about this database. We'll try again on next # ->connect(). delete $anvil->data->{database}{$uuid}; + $anvil->data->{sys}{database}{connections}--; next; } } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "sys::database::connections" => $anvil->data->{sys}{database}{connections}, + }}); + if (($anvil->data->{sys}{database}{connections}) && (not $all)) + { + # Stop connecting here. + $anvil->data->{cache}{active_db} = $uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cache::active_db" => $anvil->data->{cache}{active_db}, + }}); + last; + } + } + + # If we're not connecting to all databases, perform shutdown / backup / daemon management logic. + if (not $all) + { + my $local_host_type = $anvil->Get->host_type(); + my $local_host_uuid = $anvil->Get->host_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + local_host_type => $local_host_type, + local_host_uuid => $local_host_uuid, + }}); + + # Are we a Striker? + if ($local_host_type eq "striker") + { + # If we didn't connect to any database, it's possible/likely our peer is down and we need to + # start our local postgres database server. + if ((not $anvil->data->{sys}{database}{connections}) && (not $running)) + { + # Tell the user we're going to try to load and start. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0650"}); + + # Look for pgdumps. "Youngest" is the one with the highest mtime. + my $use_dump = ""; + my $youngest_dump = 0; + my $directory = $anvil->data->{path}{directories}{pgsql}; + my $db_name = $anvil->data->{sys}{database}{name}; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }}); + local(*DIRECTORY); + opendir(DIRECTORY, $directory); + while(my $file = readdir(DIRECTORY)) + { + next if $file eq "."; + next if $file eq ".."; + my $db_dump_uuid = ""; + my $full_path = $directory."/".$file; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + file => $file, + full_path => $full_path, + }}); + if ($file =~ /${db_name}_db_dump\.(.*).sql/) + { + $db_dump_uuid = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }}); + + # Is this one of our own dumps? + if ($db_dump_uuid eq $local_host_uuid) + { + # Ignore it. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0653", variables => { full_path => $full_path }); + next; + } + + # Is this a database we're configured to use? + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0651", variables => { full_path => $full_path }); + if ((not exists $anvil->data->{database}{$db_dump_uuid}) or (not $anvil->data->{database}{$db_dump_uuid}{host})) + { + # Not a database we're peered with anymore, ignore it. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0652", variables => { + full_path => $full_path, + host_uuid => $db_dump_uuid, + }); + next; + } + } + else + { + next; + } + + # What's the mtime on this file? + $anvil->Storage->get_file_stats({debug => $debug, file => $full_path}); + my $mtime = $anvil->data->{file_stat}{$file_path}{modified_time}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }}); + + if ($mtime > $youngest_dump) + { + # This is the youngest, so far. + $youngest_dump = $mtime; + $use_dump = $full_path; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + youngest_dump => $youngest_dump, + full_path => $full_path, + }}); + } + } + + # Did I find a dump to load? + if ($use_dump) + { + # Yup! This will start the database, if needed. + my $file_size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$file_path}{size}}); + my $file_size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$file_path}{size}}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0656", variables => { + file => $file_path, + size => $file_size, + size_bytes => $file_size_bytes, + }}); + + my $problem = $anvil->Database->load_database({ + debug => $debug, + backup => 1, + load_file => $full_path, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); + + ### TODO: Loop through the directory and delete all dumps from other + ### Strikers. This way we won't roll back in time if we restart + ### and there's been no new dumps made. + } + + # Check if the dameon is running + my $running = $anvil->System->check_daemon({daemon => "NetworkManager"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }}); + if (not $running) + { + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + + # Recall this method. + if (not $retry) + { + $anvil->Database->connect({debug => $debug, retry => 1}); + } + } + } + } + } } my $total = tv_interval ($start_time, [gettimeofday]); @@ -13761,6 +14008,242 @@ WHERE } +=head2 load_database + +This takes a path to an uncompressed SQL database dump file, and loads it into the C<< anvil >> database. During the duration of this operation, remote access to the database will be disabled via C<< iptables >> drop on port 5432! + +If necessary, the database server will be started. + +If the dump is successfully loaded, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned. + +B<< Note >>: This method must be called by the root user. + +B<< Note >>: This always and only works on the local database server's C<< anvil >> database. + +Parameters; + +=head3 backup (optional, default '1') + +This controls whether the data in the existing database is saved to a file prior to the passed-in database file being loaded. + +=head3 load_file (required) + +This is the full path to the SQL file to load into the database. + +=cut +sub load_database +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->load_database()" }}); + + my $backup = $parameter->{backup} ? $parameter->{backup} : 1; + my $load_file = $parameter->{load_file} ? $parameter->{load_file} : 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + backup => $backup, + load_file => $load_file, + }}); + + # Only the root user can do this + if (($< != 0) && ($> != 0)) + { + # Not root + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0350"}); + return('!!error!!'); + } + + # Does the file exist? + if (not $load_file) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->load_database()", parameter => "load_file" }}); + } + elsif (not -e $load_file) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0103", variables => { file => $load_file }}); + return('!!error!!'); + } + + my $start_time = time; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { start_time => $start_time }}); + + ### TODO: Replace this with System->manage_firewall(). + # Throw up the firewall. Have the open call ready in case we hit an error. + my $block_call = $anvil->data->{path}{exe}{iptables}." -I INPUT -p tcp --dport 5432 -j REJECT"; + my $open_call = $anvil->data->{path}{exe}{iptables}." -D INPUT -p tcp --dport 5432 -j REJECT"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { block_call => $block_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $block_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + # Start the database, if needed. + my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { running => $running }}); + if (not $running) + { + # Start it up. + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + } + else + { + # Failed to start + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return("!!error!!"); + } + } + + # Backup, if needed. + if ($backup) + { + # Backup the database. + my $problem = $anvil->Database->backup_database({debug => $debug}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); + if ($problem) + { + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return("!!error!!"); + } + } + + # Drop the existing database. + my $drop_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{dropdb}." ".$anvil->data->{sys}{database}{name}."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { drop_call => $drop_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $drop_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + # This is a failure, but it could be that the database simply didn't exist (was already + # dumped). If that's the case, we'll keep going. + my $proceed = 0; + if ($output =~ /database ".*?" does not exist/gs) + { + # proceed. + $proceed = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { proceed => $proceed }}); + } + if (not $proceed) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0353", variables => { + shell_call => $drop_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + } + + # Recreate the DB. + my $create_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner ".$anvil->data->{sys}{database}{user}." ".$anvil->data->{sys}{database}{name}."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_call => $create_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $create_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0354", variables => { + shell_call => $create_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + + # Finally, load the database. + my $load_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pgsql}." < ".$load_file."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { load_call => $load_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $load_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0355", variables => { + shell_call => $load_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + + # Open the firewall back up + $output = ""; + $return_code = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + # Done! + my $took_time = time - $start_time; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0655", variables => { + file => $load_file, + took => $took_time, + }}); + + return(0); +} + + =head2 lock_file This reads, sets or updates the database lock file timestamp. diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index 3ff0f75a..3915aa43 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -20,6 +20,7 @@ my $THIS_FILE = "Storage.pm"; # change_mode # change_owner # check_md5sums +# compress # copy_file # delete_file # find diff --git a/notes b/notes index 5d33a07e..604d9fc3 100644 --- a/notes +++ b/notes @@ -8,51 +8,14 @@ TODO: ============ # Dump -su - postgres -c "pg_dump anvil > /tmp/anvil.out" && mv /tmp/anvil.out /root/ -su - postgres -c "pg_dump --schema-only anvil > /tmp/anvil.out" && mv /tmp/anvil.out /root/ +su - postgres -c "pg_dump anvil > /var/lib/pgsql/anvil.out" +su - postgres -c "pg_dump --schema-only anvil > /var/lib/pgsql/anvil_schema.out" -cp /root/anvil.out /; su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /anvil.out" +su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /var/lib/pgsql/anvil.out" su postgres -c "psql anvil" ============ -# ScanCore post-scan logic; - -Sole node: -1. Evaluate critical shutdown only, if hosting VMs. -2. If not hosting VMs, load-shed if over-heat / power loss for more than 120 seconds - -Action options; -1. Do nothing -2. Pull servers -3. Shut down (once servers are gone) -4. Shut down (gracefully stop servers) - - -* Peer not available - - Thermal is critical, gracefully shut down. - - Power is strongest UPS below ten minutes and time on batteries is over 2 minutes, graceful shut down -* Peer available - - If one node is healthier than the other; - - If we're sicker, do nothing until we have no servers - - If we're healthier, after two minutes, pull - - If health is equal; - - Both nodes have servers; - - Decide who can be evacuated fastest, in case load shed needed. - - Both nodes on batteries or in warning temp for more than 2 minutes; - - If we're the designated survivor, pull servers. - - If we're the sacrifice, wait for the servers to be taken off of us, then shut down. - - Peer has servers, we don't - - If thermal warning or both/all UPSes on batter for two minutes+, shut down - - We have servers, peer doesn't. - - Keep running - - - -1.1 - Our peer may pull from us. -2. - Not Hosting Servers -2.1 - - Jenkins; diff --git a/share/words.xml b/share/words.xml index 94948381..10e93f8c 100644 --- a/share/words.xml +++ b/share/words.xml @@ -477,6 +477,29 @@ The output, if any, was - The problematic new config has been saved as: [#!variable!file!#]. - The old config has been restored. Exiting. - The logical volume: [#!variable!lv_path!#] creation failed. Unable to proceed. + Only the root user can load a database file and start the database. + [ Error ] - The 'pg_dump' call to backup the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was +==== +#!variable!output!# +==== + + Only the root user can backup a database. + [ Error ] - The 'dropdb' call to drop the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was +==== +#!variable!output!# +==== + + [ Error ] - The 'createdb' call to create the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was; +==== +#!variable!output!# +==== + @@ -1320,7 +1343,7 @@ Connecting to Database with configuration ID: [#!variable!uuid!#] users_home() was asked to find the home directory for the user: [#!variable!user!#], but was unable to do so.]]> SSH session opened without a password to: [#!variable!target!#]. #!variable!name!#] with the UUID: [#!variable!uuid!#] did not respond to pings and 'database::#!variable!uuid!#::ping' is not set to '0' in '#!data!path::configs::anvil.conf!#', skipping it.]]> - [ Warning ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] can not be used, skipping it. + [ Note ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] can not be used, skipping it. The database connection error was: ---------- @@ -1992,6 +2015,13 @@ The file: [#!variable!file!#] needs to be updated. The difference is: [ Note ] - The network interface: [#!variable!name!#] with 'network_interface_uuid': [#!variable!uuid!#] is a duplicate, removing it from the database(s). [ Note ] - Managing /etc/hosts has been disabled. [ Note ] - The Anvil!: [#!variable!anvil_name!#]'s storage group: [#!variable!storage_group!#] didn't have an entry for the host: [#!variable!host_name!#]. The volume group: [#!variable!vg_internal_uuid!#] is a close fit and not in another storage group, so adding it to this storage group now. + [ Note ] - We're a Striker and we did not connect to a peer's database. Will check now if we can load a recent backup, then start postgres locally (with or without a load). + Evaluating the dump file: [#!variable!full_path!#]. + The database host UUID: [#!variable!host_uuid!#] is not configured here, ignoring: [#!variable!full_path!#]. + We created the database dump file: [#!variable!full_path!#], ignoring it. + The database was dumped to: [#!variable!file!#] in: [#!variable!took!#] second(s). The size of the dump file is: [#!variable!size!#] (#!variable!size_bytes) bytes). + The database was loaded successfull from the file: [#!variable!file!#] in: [#!variable!took!#] second(s)! + No databases were available, so we will become primary after loading: [#!variable!file!#], which is: [#!variable!size!#] (#!variable!size_bytes!# bytes). Please be patient, this could take a moment. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 7aec9e63..23d9cff7 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -1,4 +1,4 @@ -#!/usr/bin/perl +# #!/usr/bin/perl # # This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster # nodes and DR hosts. @@ -171,19 +171,21 @@ my $delay = set_delay($anvil); # Once a minute, we'll check the md5sums and see if we should restart. # Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards). -$anvil->data->{timing}{minute_checks} = 60; -$anvil->data->{timing}{daily_checks} = 86400; -$anvil->data->{timing}{repo_update_interval} = 86400; -$anvil->data->{timing}{next_minute_check} = $now_time - 1; -$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; +$anvil->data->{timing}{minute_checks} = 60; +$anvil->data->{timing}{daily_checks} = 86400; +$anvil->data->{timing}{repo_update_interval} = 86400; +$anvil->data->{timing}{next_minute_check} = $now_time - 1; +$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1; +$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, - "s2:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, - "s3:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, - "s4:now_time" => $now_time, - "s5:delay" => $delay, - "s6:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, - "s7:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, + "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, + "s2:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, + "s3:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, + "s4:now_time" => $now_time, + "s5:delay" => $delay, + "s6:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, + "s6:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, + "s7:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, }}); # Disconnect. We'll reconnect inside the loop @@ -418,10 +420,11 @@ sub handle_periodic_tasks my $now_time = time; my $type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "s1:now_time" => $now_time, - "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, - "s3:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, - "s4:type" => $type, + "s1:now_time" => $now_time, + "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, + "s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, + "s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, + "s5:type" => $type, }}); # Time to run once per minute tasks. @@ -503,6 +506,24 @@ sub handle_periodic_tasks $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); } + # Now check to see if it's time to run less frequent tasks. + if ($now_time >= $anvil->data->{timing}{next_ten_minute_check}) + { + if ($type eq "striker") + { + # If we're the active database, dump out database out and rsync it to our peers. + my $peers = keys $anvil->data->{database}; + my $host_uuid = $anvil->Get->host_uuid; + my $connections = $anvil->data->{sys}{database}{connections}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peers => $peers, + host_uuid => $host_uuid, + connections => $connections, + }}); + if ($anvil->data->{sys}{database}{local_uuid} eq $host_uuid) + } + } + # Now check to see if it's time to run daily tasks. if ($now_time >= $anvil->data->{timing}{next_daily_check}) {