diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index f358106e..16d4ad8c 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -874,10 +874,14 @@ sub _set_defaults locking_reap_age => 300, log_transactions => 0, maximum_batch_size => 25000, + # NOTE: Do NOT change this unless you are certain all machines that use this host + # have been likewise updated! name => "anvil", read_uuid => "", test_table => "hosts", timestamp => "", + # NOTE: Do NOT change this unless you are certain all machines that use this host + # have been likewise updated! user => "admin", use_handle => "", }, @@ -1077,6 +1081,7 @@ sub _set_paths html => "/var/www/html", ifcfg => "/etc/sysconfig/network-scripts", journald => "/var/log/journal", + pgsql => "/var/lib/pgsql/", resource_status => "/sys/kernel/debug/drbd/resources", scan_agents => "/usr/sbin/scancore-agents", shared => { @@ -1109,6 +1114,7 @@ sub _set_paths 'anvil-get-server-screenshot' => "/usr/sbin/anvil-get-server-screenshot", 'anvil-join-anvil' => "/usr/sbin/anvil-join-anvil", 'anvil-maintenance-mode' => "/usr/sbin/anvil-maintenance-mode", + 'anvil-manage-dr' => "/usr/sbin/anvil-manage-dr", 'anvil-manage-firewall' => "/usr/sbin/anvil-manage-firewall", 'anvil-manage-keys' => "/usr/sbin/anvil-manage-keys", 'anvil-manage-power' => "/usr/sbin/anvil-manage-power", @@ -1146,6 +1152,7 @@ sub _set_paths dnf => "/usr/bin/dnf", drbdadm => "/usr/sbin/drbdadm", drbdsetup => "/usr/sbin/drbdsetup", + dropdb => "/usr/bin/dropdb", echo => "/usr/bin/echo", ethtool => "/usr/sbin/ethtool", expect => "/usr/bin/expect", @@ -1167,6 +1174,10 @@ sub _set_paths ip => "/usr/sbin/ip", 'ipmi-oem' => "/usr/sbin/ipmi-oem", ipmitool => "/usr/bin/ipmitool", + ### NOTE: When System->manage_firewall() is done, search for and replace all + ### instances where iptables is called and replace with firewall-cmd + ### calls + iptables => "/usr/sbin/iptables", 'iptables-save' => "/usr/sbin/iptables-save", journalctl => "/usr/bin/journalctl", logger => "/usr/bin/logger", @@ -1196,6 +1207,7 @@ sub _set_paths pcs => "/usr/sbin/pcs", perccli64 => "/opt/MegaRAID/perccli/perccli64", ping => "/usr/bin/ping", + pg_dump => "/usr/bin/pg_dump", pgrep => "/usr/bin/pgrep", ps => "/usr/bin/ps", psql => "/usr/bin/psql", diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index 12cd435d..c34a687d 100644 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -20,6 +20,7 @@ my $THIS_FILE = "DRBD.pm"; # delete_resource # gather_data # get_devices +# get_next_resource # get_status # manage_resource # reload_defaults @@ -617,6 +618,12 @@ sub gather_data $anvil->data->{new}{scan_drbd}{scan_drbd_timeout} = 6; # Default is '60', 6 seconds $anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed} = 0; + my $local_host_name = $anvil->Get->host_name; + my $local_short_host_name = $anvil->Get->short_host_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + local_host_name => $local_host_name, + local_short_host_name => $local_short_host_name, + }}); foreach my $name ($dom->findnodes('/config/common/section')) { my $section = $name->{name}; @@ -705,7 +712,7 @@ sub gather_data }}); # Record the local data only. - if (($this_host_name eq $anvil->Get->host_name) or ($this_host_name eq $anvil->Get->short_host_name)) + if (($this_host_name eq $local_host_name) or ($this_host_name eq $local_short_host_name)) { $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path} = $volume_vnr->findvalue('./device'); $anvil->data->{new}{resource}{$resource}{volume}{$volume}{backing_disk} = $volume_vnr->findvalue('./disk'); @@ -722,32 +729,135 @@ sub gather_data foreach my $connection ($name->findnodes('./connection')) { - my $peer = ""; + my $host1_name = ""; + my $host1_ip_address = ""; + my $host1_tcp_port = ""; + my $host2_name = ""; + my $host2_ip_address = ""; + my $host2_tcp_port = ""; + my $peer = ""; foreach my $host ($connection->findnodes('./host')) { my $this_host_name = $host->{name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { this_host_name => $this_host_name }}); + if (not $host1_name) + { + $host1_name = $this_host_name; + $host1_ip_address = $host->findvalue('./address'); + $host1_tcp_port = $host->findvalue('./address/@port'); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host1_name => $host1_name, + host1_ip_address => $host1_ip_address, + host1_tcp_port => $host1_tcp_port, + }}); + } + else + { + $host2_name = $this_host_name; + $host2_ip_address = $host->findvalue('./address'); + $host2_tcp_port = $host->findvalue('./address/@port'); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host2_name => $host2_name, + host2_ip_address => $host2_ip_address, + host2_tcp_port => $host2_tcp_port, + }}); + } - next if (($this_host_name eq $anvil->Get->host_name) or ($this_host_name eq $anvil->Get->short_host_name)); - - $peer = $this_host_name; - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address} = $host->findvalue('./address'); - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port} = $host->findvalue('./address/@port');; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "s1:new::resource::${resource}::peer::${peer}::peer_ip_address" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address}, - "s2:new::resource::${resource}::peer::${peer}::tcp_port" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port}, - }}); +# $peer = $this_host_name; +# $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address} = $host->findvalue('./address'); +# $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port} = $host->findvalue('./address/@port'); +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { +# "s1:new::resource::${resource}::peer::${peer}::peer_ip_address" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address}, +# "s2:new::resource::${resource}::peer::${peer}::tcp_port" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port}." (".$host->findvalue('./address/@port').")", +# }}); - if (not exists $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}) +# if (not exists $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}) +# { +# $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = "unknown"; +# $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = "unknown"; +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { +# "s1:new::resource::${resource}::peer::${peer}::protocol" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}, +# "s2:new::resource::${resource}::peer::${peer}::fencing" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing}, +# }}); +# } +# +# foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{volume}}) +# { +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{connection_state} = "disconnected"; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_disk_state} = "down"; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_disk_state} = "unknown"; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_role} = "down"; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_role} = "unknown"; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{out_of_sync_size} = -1; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed} = 0; +# $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync} = 0; +# } + } + + if (($host1_name eq $local_short_host_name) or + ($host1_name eq $local_host_name) or + ($host2_name eq $local_short_host_name) or + ($host2_name eq $local_host_name)) + { + # This is one of our connections. + my $peer = ""; + if (($host1_name eq $local_short_host_name) or ($host1_name eq $local_host_name)) + { + # Our peer is host2 + $peer = $host2_name; + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address} = $host2_ip_address; + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port} = $host2_tcp_port; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "s1:new::resource::${resource}::peer::${peer}::peer_ip_address" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address}, + "s2:new::resource::${resource}::peer::${peer}::tcp_port" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port}, + }}); + } + else { - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = "unknown"; - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = "unknown"; + # Our peer is host1 + $peer = $host1_name; + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address} = $host1_ip_address; + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port} = $host1_tcp_port; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "s1:new::resource::${resource}::peer::${peer}::protocol" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}, - "s2:new::resource::${resource}::peer::${peer}::fencing" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing}, + "s1:new::resource::${resource}::peer::${peer}::peer_ip_address" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address}, + "s2:new::resource::${resource}::peer::${peer}::tcp_port" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port}, }}); } + foreach my $name ($connection->findnodes('./section')) + { + my $section = $name->{name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { section => $section }}); + + foreach my $option_name ($name->findnodes('./option')) + { + my $variable = $option_name->{name}; + my $value = $option_name->{value}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:variable' => $variable, + 's2:value' => $value, + }}); + + if ($section eq "net") + { + if ($variable eq "protocol") + { + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = $value; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "new::resource::${resource}::peer::${peer}::protocol" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}, + }}); + } + if ($variable eq "fencing") + { + $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = $value; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "new::resource::${resource}::peer::${peer}::fencing" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing}, + }}); + } + } + } + } + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{volume}}) { $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{connection_state} = "disconnected"; @@ -760,40 +870,6 @@ sub gather_data $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync} = 0; } } - - foreach my $name ($connection->findnodes('./section')) - { - my $section = $name->{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { section => $section }}); - - foreach my $option_name ($name->findnodes('./option')) - { - my $variable = $option_name->{name}; - my $value = $option_name->{value}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - 's1:variable' => $variable, - 's2:value' => $value, - }}); - - if ($section eq "net") - { - if ($variable eq "protocol") - { - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = $value; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "new::resource::${resource}::peer::${peer}::protocol" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol}, - }}); - } - if ($variable eq "fencing") - { - $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = $value; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "new::resource::${resource}::peer::${peer}::fencing" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing}, - }}); - } - } - } - } } } } @@ -1249,6 +1325,10 @@ Parameters; This is the Anvil! in which we're looking for the next free resources. It's required, but generally it doesn't need to be specified as we can find it via C<< Cluster->get_anvil_uuid() >>. +=head3 dr_tcp_ports (optional, default '0') + +If set, the 'free_port' returned will be a comma-separated pair of TCP ports. This is meant to help find two TCP ports needed to connect a resource from both nodes to a DR host. + =head3 resource_name (optional) If this is set, and the resource is found to already exist, the first DRBD minor number and first used TCP port are returned. Alternatively, if C<< force_unique >> is set to C<< 1 >>, and the resource is found to exist, empty strings are returned. @@ -1267,10 +1347,12 @@ sub get_next_resource $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "DRBD->get_next_resource()" }}); my $anvil_uuid = defined $parameter->{anvil_uuid} ? $parameter->{anvil_uuid} : ""; + my $dr_tcp_ports = defined $parameter->{dr_tcp_ports} ? $parameter->{dr_tcp_ports} : ""; my $resource_name = defined $parameter->{resource_name} ? $parameter->{resource_name} : ""; my $force_unique = defined $parameter->{force_unique} ? $parameter->{force_unique} : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { anvil_uuid => $anvil_uuid, + dr_tcp_ports => $dr_tcp_ports, resource_name => $resource_name, force_unique => $force_unique, }}); @@ -1387,7 +1469,7 @@ ORDER BY 's6:scan_drbd_volume_device_minor' => $scan_drbd_volume_device_minor, 's7:scan_drbd_peer_host_name' => $scan_drbd_peer_host_name, 's8:scan_drbd_peer_ip_address' => $scan_drbd_peer_ip_address, - 's9:scan_drbd_peer_protocol' => $scan_drbd_peer_protocol, + 's9:scan_drbd_peer_protocol' => $scan_drbd_peer_protocol, 's10:scan_drbd_peer_fencing' => $scan_drbd_peer_fencing, 's11:scan_drbd_peer_tcp_port' => $scan_drbd_peer_tcp_port, }}); @@ -1434,22 +1516,55 @@ ORDER BY } } - $looking = 1; - $free_port = 7788; + $looking = 1; + $free_port = 7788; + my $tcp_pair = ""; while($looking) { - if (exists $anvil->data->{drbd}{used_resources}{tcp_port}{$free_port}) + if ((exists $anvil->data->{drbd}{used_resources}{tcp_port}{$free_port}) && + ($anvil->data->{drbd}{used_resources}{tcp_port}{$free_port}{used})) { $free_port++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_port => $free_port }}); } else { - $looking = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { looking => $looking }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_port => $free_port }}); + if ($dr_tcp_ports) + { + if (not $tcp_pair) + { + $tcp_pair = $free_port; + $free_port++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + tcp_pair => $tcp_pair, + free_port => $free_port, + }}); + } + elsif ($tcp_pair !~ /,/) + { + $tcp_pair .= ",".$free_port; + $looking = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + tcp_pair => $tcp_pair, + looking => $looking, + }}); + } + } + else + { + $looking = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { looking => $looking }}); + } } } + if ($dr_tcp_ports) + { + $free_port = $tcp_pair; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_port => $free_port }}); + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_minor => $free_minor, free_port => $free_port, diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index ed0b6b23..5ed17f2f 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -17,6 +17,7 @@ my $THIS_FILE = "Database.pm"; ### Methods; # archive_database +# backup_database # check_file_locations # check_lock_age # check_for_schema @@ -78,6 +79,7 @@ my $THIS_FILE = "Database.pm"; # insert_or_update_upses # insert_or_update_users # insert_or_update_variables +# load_database # lock_file # locking # manage_anvil_conf @@ -300,6 +302,81 @@ sub archive_database } +=head2 backup_database + +This backs up the database to the C<< path::directories::pgsql >> directory as the file name C<< anvil_pg_dump..out >>. + +If the backup is successful, the full path to the backup file is returned. If there is a problem, C<< !!error!! >> is returned. + +B<< Note >>: This method must be called by the root user. + +B<< Note >>: If C<< sys::database::name >> has been changed, the dump file name will match. + +This method takes no parameters. + +=cut +sub backup_database +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->backup_database()" }}); + + # Only the root user can do this + if (($< != 0) && ($> != 0)) + { + # Not root + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0352"}); + return('!!error!!'); + } + + my $start_time = time; + my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".sql"; + $dump_file =~ s/\/\//\//g; + my $dump_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." ".$anvil->data->{sys}{database}{name}." > ".$dump_file."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + dump_file => $dump_file, + dump_call => $dump_call, + }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $dump_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + if ($return_code) + { + # Dump failed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0351", variables => { + shell_call => $dump_call, + return_code => $return_code, + output => $output, + }}); + + # Clear the out file. + if (-e $dump_file) + { + unlink $dump_file; + } + return('!!error!!'); + } + + # Record the stats + $anvil->Storage->get_file_stats({debug => $debug, file_path => $dump_file}); + my $dump_time = time - $start_time; + my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}}); + my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0654", variables => { + file => $dump_file, + took => $dump_time, + size => $size, + size_bytes => $size_bytes, + }}); + + return($dump_file); +} + =head2 check_file_locations This method checks to see that there is a corresponding entry in C<< file_locations >> for all Anvil! systems and files in the database. Any that are found to be missing will be set to C<< file_location_active >> -> c<< false >>. @@ -722,35 +799,34 @@ sub configure_pgsql return(1); } - # First, is it running? - my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); + # First, is it running and is it initialized? + my $initialized = 0; + my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { running => $running }}); - - if (not $running) + if (not -e $anvil->data->{path}{configs}{'pg_hba.conf'}) { - # Do we need to initialize the databae? + # Initialize. Record that we did so, so that we know to start the daemon. + my ($output, $return_code) = $anvil->System->call({debug => 1, shell_call => $anvil->data->{path}{exe}{'postgresql-setup'}." initdb", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { output => $output, return_code => $return_code }}); + + # Did it succeed? if (not -e $anvil->data->{path}{configs}{'pg_hba.conf'}) { - # Initialize. - my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'postgresql-setup'}." initdb", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { output => $output, return_code => $return_code }}); + # Failed... + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0050"}); + return("!!error!!"); + } + else + { + # Initialized! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0055"}); - # Did it succeed? - if (not -e $anvil->data->{path}{configs}{'pg_hba.conf'}) - { - # Failed... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0050"}); - return("!!error!!"); - } - else - { - # Initialized! - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0055"}); - - # Enable it on boot. - my $return_code = $anvil->System->enable_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); - } + $initialized = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { initialized => $initialized }}); + + ### NOTE: We no longer enable postgres on boot. When the first call is made to + ### Database->connect on a striker, and no databases are available, it will + ### start up the local daemon then. } } @@ -835,7 +911,7 @@ sub configure_pgsql { # Back up the existing one, if needed. my $pg_hba_backup = $anvil->data->{path}{directories}{backups}."/pgsql/pg_hba.conf"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pg_hba_backup => $pg_hba_backup }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { pg_hba_backup => $pg_hba_backup }}); if (not -e $pg_hba_backup) { $anvil->Storage->copy_file({ @@ -859,26 +935,30 @@ sub configure_pgsql # Start or restart the daemon? if (not $running) { - # Start the daemon. - my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); - if ($return_code eq "0") - { - # Started the daemon. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); - } - else + # Did we initialize? + if ($initialized) { - # Failed to start - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); - return("!!error!!"); + # Start the daemon. + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + } + else + { + # Failed to start + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); + return("!!error!!"); + } } } elsif (($update_postgresql_file) or ($update_pg_hba_file)) { # Reload my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { return_code => $return_code }}); if ($return_code eq "0") { # Reloaded the daemon. @@ -891,150 +971,154 @@ sub configure_pgsql } } - # Create the .pgpass file, if needed. - my $created_pgpass = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { - 'path::secure::postgres_pgpass' => $anvil->data->{path}{secure}{postgres_pgpass}, - "database::${uuid}::password" => $anvil->Log->is_secure($anvil->data->{database}{$uuid}{password}), - }}); - if ((not -e $anvil->data->{path}{secure}{postgres_pgpass}) && ($anvil->data->{database}{$uuid}{password})) + # Do user and DB checks only if we're made a change above. + if (($initialized) or ($update_postgresql_file) or ($update_pg_hba_file)) { - my $body = "*:*:*:postgres:".$anvil->data->{database}{$uuid}{password}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { body => $body }}); - $anvil->Storage->write_file({ - file => $anvil->data->{path}{secure}{postgres_pgpass}, - body => $body, - user => "postgres", - group => "postgres", - mode => "0600", - overwrite => 1, - secure => 1, - }); - if (-e $anvil->data->{path}{secure}{postgres_pgpass}) - { - $created_pgpass = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { created_pgpass => $created_pgpass }}); + # Create the .pgpass file, if needed. + my $created_pgpass = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { + 'path::secure::postgres_pgpass' => $anvil->data->{path}{secure}{postgres_pgpass}, + "database::${uuid}::password" => $anvil->Log->is_secure($anvil->data->{database}{$uuid}{password}), + }}); + if ((not -e $anvil->data->{path}{secure}{postgres_pgpass}) && ($anvil->data->{database}{$uuid}{password})) + { + my $body = "*:*:*:postgres:".$anvil->data->{database}{$uuid}{password}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { body => $body }}); + $anvil->Storage->write_file({ + file => $anvil->data->{path}{secure}{postgres_pgpass}, + body => $body, + user => "postgres", + group => "postgres", + mode => "0600", + overwrite => 1, + secure => 1, + }); + if (-e $anvil->data->{path}{secure}{postgres_pgpass}) + { + $created_pgpass = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { created_pgpass => $created_pgpass }}); + } } - } - - # Does the database user exist? - my $create_user = 1; - my $database_user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_user => $database_user }}); - if (not $database_user) - { - # No database user defined - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0099", variables => { uuid => $uuid }}); - return("!!error!!"); - } - my ($user_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT usename, usesysid FROM pg_catalog.pg_user;'\"", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { user_list => $user_list, return_code => $return_code }}); - foreach my $line (split/\n/, $user_list) - { - if ($line =~ /^ $database_user\s+\|\s+(\d+)/) - { - # User exists already - my $uuid = $1; - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0060", variables => { user => $database_user, uuid => $uuid }}); - $create_user = 0; - last; + + # Does the database user exist? + my $create_user = 1; + my $database_user = $anvil->data->{database}{$uuid}{user} ? $anvil->data->{database}{$uuid}{user} : $anvil->data->{sys}{database}{user}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_user => $database_user }}); + if (not $database_user) + { + # No database user defined + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0099", variables => { uuid => $uuid }}); + return("!!error!!"); } - } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_user => $create_user }}); - if ($create_user) - { - # Create the user - my ($create_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createuser}." --no-superuser --createdb --no-createrole $database_user\"", source => $THIS_FILE, line => __LINE__}); - (my $user_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT usename, usesysid FROM pg_catalog.pg_user;'\"", source => $THIS_FILE, line => __LINE__}); - my $user_exists = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_output => $create_output, user_list => $user_list }}); + my ($user_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT usename, usesysid FROM pg_catalog.pg_user;'\"", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { user_list => $user_list, return_code => $return_code }}); foreach my $line (split/\n/, $user_list) { if ($line =~ /^ $database_user\s+\|\s+(\d+)/) { - # Success! + # User exists already my $uuid = $1; - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0095", variables => { user => $database_user, uuid => $uuid }}); - $user_exists = 1; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0060", variables => { user => $database_user, uuid => $uuid }}); + $create_user = 0; last; } } - if (not $user_exists) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_user => $create_user }}); + if ($create_user) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0096", variables => { user => $database_user }}); - return("!!error!!"); - } - - # Update/set the passwords. - if ($anvil->data->{database}{$uuid}{password}) - { - foreach my $user ("postgres", $database_user) + # Create the user + my ($create_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createuser}." --no-superuser --createdb --no-createrole $database_user\"", source => $THIS_FILE, line => __LINE__}); + (my $user_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT usename, usesysid FROM pg_catalog.pg_user;'\"", source => $THIS_FILE, line => __LINE__}); + my $user_exists = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_output => $create_output, user_list => $user_list }}); + foreach my $line (split/\n/, $user_list) { - my ($update_output, $return_code) = $anvil->System->call({secure => 1, shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c \\\"ALTER ROLE $user WITH PASSWORD '".$anvil->data->{database}{$uuid}{password}."';\\\"\"", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { update_output => $update_output, return_code => $return_code }}); - foreach my $line (split/\n/, $user_list) + if ($line =~ /^ $database_user\s+\|\s+(\d+)/) { - if ($line =~ /ALTER ROLE/) + # Success! + my $uuid = $1; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0095", variables => { user => $database_user, uuid => $uuid }}); + $user_exists = 1; + last; + } + } + if (not $user_exists) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0096", variables => { user => $database_user }}); + return("!!error!!"); + } + + # Update/set the passwords. + if ($anvil->data->{database}{$uuid}{password}) + { + foreach my $user ("postgres", $database_user) + { + my ($update_output, $return_code) = $anvil->System->call({secure => 1, shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c \\\"ALTER ROLE $user WITH PASSWORD '".$anvil->data->{database}{$uuid}{password}."';\\\"\"", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 1, list => { update_output => $update_output, return_code => $return_code }}); + foreach my $line (split/\n/, $user_list) { - # Password set - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0100", variables => { user => $user }}); + if ($line =~ /ALTER ROLE/) + { + # Password set + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0100", variables => { user => $user }}); + } } } } } - } - - # Create the database, if needed. - my $create_database = 1; - my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_name => $database_name }}); - - (my $database_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT datname FROM pg_catalog.pg_database;'\"", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_list => $database_list, return_code => $return_code }}); - foreach my $line (split/\n/, $database_list) - { - if ($line =~ /^ $database_name$/) - { - # Database already exists. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0105", variables => { database => $database_name }}); - $create_database = 0; - last; - } - } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_database => $create_database }}); - if ($create_database) - { - my ($create_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner $database_user $database_name\"", source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_output => $create_output, return_code => $return_code }}); - my $database_exists = 0; + # Create the database, if needed. + my $create_database = 1; + my $database_name = defined $anvil->data->{database}{$uuid}{name} ? $anvil->data->{database}{$uuid}{name} : $anvil->data->{sys}{database}{name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_name => $database_name }}); + (my $database_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT datname FROM pg_catalog.pg_database;'\"", source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_list => $database_list, return_code => $return_code }}); foreach my $line (split/\n/, $database_list) { if ($line =~ /^ $database_name$/) { - # Database created - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0110", variables => { database => $database_name }}); - $database_exists = 1; + # Database already exists. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0105", variables => { database => $database_name }}); + $create_database = 0; last; } } - if (not $database_exists) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_database => $create_database }}); + if ($create_database) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0109", variables => { database => $database_name }}); - return("!!error!!"); + my ($create_output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner $database_user $database_name\"", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_output => $create_output, return_code => $return_code }}); + + my $database_exists = 0; + (my $database_list, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." template1 -c 'SELECT datname FROM pg_catalog.pg_database;'\"", source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { database_list => $database_list, return_code => $return_code }}); + foreach my $line (split/\n/, $database_list) + { + if ($line =~ /^ $database_name$/) + { + # Database created + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0110", variables => { database => $database_name }}); + $database_exists = 1; + last; + } + } + if (not $database_exists) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0109", variables => { database => $database_name }}); + return("!!error!!"); + } } - } - - # Remove the temporary password file. - if (($created_pgpass) && (-e $anvil->data->{path}{secure}{postgres_pgpass})) - { - unlink $anvil->data->{path}{secure}{postgres_pgpass}; - if (-e $anvil->data->{path}{secure}{postgres_pgpass}) + + # Remove the temporary password file. + if (($created_pgpass) && (-e $anvil->data->{path}{secure}{postgres_pgpass})) { - # Failed to unlink the file. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0107"}); + unlink $anvil->data->{path}{secure}{postgres_pgpass}; + if (-e $anvil->data->{path}{secure}{postgres_pgpass}) + { + # Failed to unlink the file. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0107"}); + } } } @@ -1104,6 +1188,10 @@ If set, the connection will be made only to the database server matching the UUI If set to C<< 1 >>, no attempt to ping a target before connection will happen, even if C<< database::::ping = 1 >> is set. +=head3 retry (optional, default '0') + +This method will try to recall itself if this is a Striker and it found no available databases, and so became primary. If this is set, it won't try to become primary a second time. + =head3 sensitive (optional, default '0') If set to C<< 1 >>, the caller is considered time sensitive and most checks are skipped. This is used when a call must respond as quickly as possible. @@ -1164,8 +1252,9 @@ sub connect my $check_if_configured = defined $parameter->{check_if_configured} ? $parameter->{check_if_configured} : 0; my $db_uuid = defined $parameter->{db_uuid} ? $parameter->{db_uuid} : ""; - my $no_ping = defined $parameter->{no_ping} ? $parameter->{no_ping} : 0; my $check_for_resync = defined $parameter->{check_for_resync} ? $parameter->{check_for_resync} : 0; + my $no_ping = defined $parameter->{no_ping} ? $parameter->{no_ping} : 0; + my $retry = defined $parameter->{retry} ? $parameter->{retry} : 0; my $sensitive = defined $parameter->{sensitive} ? $parameter->{sensitive} : 0; my $source = defined $parameter->{source} ? $parameter->{source} : "core"; my $sql_file = defined $parameter->{sql_file} ? $parameter->{sql_file} : $anvil->data->{path}{sql}{'anvil.sql'}; @@ -1174,8 +1263,9 @@ sub connect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_if_configured => $check_if_configured, db_uuid => $db_uuid, - no_ping => $no_ping, check_for_resync => $check_for_resync, + no_ping => $no_ping, + retry => $retry, sensitive => $sensitive, source => $source, sql_file => $sql_file, @@ -1228,6 +1318,23 @@ sub connect $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_for_resync => $check_for_resync }}); } + # If we're a Striker, see if we're configured. + my $local_host_type = $anvil->Get->host_type(); + my $local_host_uuid = $anvil->Get->host_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + local_host_type => $local_host_type, + local_host_uuid => $local_host_uuid, + check_if_configured => $check_if_configured, + real_uid => $<, + effective_uid => $>, + }}); + # If requested, and if running with root access, set it up (or update it) if needed. + # This method just returns if nothing is needed. + if (($local_host_type eq "striker") && ($check_if_configured) && ($< == 0) && ($> == 0)) + { + $anvil->Database->configure_pgsql({debug => 2, uuid => $local_host_uuid}); + } + # Now setup or however-many connections my $seen_connections = []; my $failed_connections = []; @@ -1369,13 +1476,14 @@ sub connect }}); if (not $test) { - # Something went wrong... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0064", variables => { + # Either the Striker hosting this is down, or it's not primary and stopped its + # database. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, priority => "alert", key => "log_0064", variables => { uuid => $uuid, host => $host, name => $name, }}); - + push @{$failed_connections}, $uuid; my $message_key = "log_0065"; my $variables = { dbi_error => $DBI::errstr }; @@ -1422,7 +1530,7 @@ sub connect port => $port, }; } - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => $message_key, variables => $variables }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, priority => "alert", key => $message_key, variables => $variables }); next; } @@ -1549,24 +1657,12 @@ sub connect } # Before we try to connect, see if this is a local database and, if so, make sure it's setup. - my $is_local = $anvil->Network->is_local({debug => $debug, host => $host}); + my $is_local = $anvil->Network->is_local({debug => 2, host => $host}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }}); if ($is_local) { $anvil->data->{sys}{database}{read_uuid} = $uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }}); - - # If requested, and if running with root access, set it up (or update it) if needed. - # This method just returns if nothing is needed. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - check_if_configured => $check_if_configured, - real_uid => $<, - effective_uid => $>, - }}); - if (($check_if_configured) && ($< == 0) && ($> == 0)) - { - $anvil->Database->configure_pgsql({debug => $debug, uuid => $uuid}); - } } elsif (not $anvil->data->{sys}{database}{read_uuid}) { @@ -1602,14 +1698,177 @@ sub connect target_version => $remote_schema_version, }}); - # Delete the information about this database. We'll try again on nexy + # Delete the information about this database. We'll try again on next # ->connect(). delete $anvil->data->{database}{$uuid}; + $anvil->data->{sys}{database}{connections}--; next; } } } + # If we're a striker and no connections were found, start our database. + if (($local_host_type eq "striker") && (not $anvil->data->{sys}{database}{connections})) + { + # Tell the user we're going to try to load and start. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0650"}); + + # Look for pgdumps. "Youngest" is the one with the highest mtime. + my $use_dump = ""; + my $backup_age = 0; + my $youngest_dump = 0; + my $directory = $anvil->data->{path}{directories}{pgsql}; + my $db_name = $anvil->data->{sys}{database}{name}; + my $dump_files = []; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { directory => $directory }}); + local(*DIRECTORY); + opendir(DIRECTORY, $directory); + while(my $file = readdir(DIRECTORY)) + { + next if $file eq "."; + next if $file eq ".."; + my $db_dump_uuid = ""; + my $full_path = $directory."/".$file; + $full_path =~ s/\/\//\//g; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + file => $file, + full_path => $full_path, + }}); + if ($file =~ /\Q$db_name\E_db_dump\.(.*).sql/) + { + $db_dump_uuid = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { directory => $directory }}); + + # Is this one of our own dumps? + if ($db_dump_uuid eq $local_host_uuid) + { + # How recent is it? + $anvil->Storage->get_file_stats({debug => $debug, file_path => $full_path}); + my $mtime = $anvil->data->{file_stat}{$full_path}{modified_time}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mtime => $mtime }}); + + if ($mtime > $backup_age) + { + $backup_age = $mtime; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_age => $backup_age }}); + } + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0653", variables => { full_path => $full_path }}); + next; + } + + # Record this dump file for later purging. + push @{$dump_files}, $full_path; + + # Is this a database we're configured to use? + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0651", variables => { full_path => $full_path }}); + if ((not exists $anvil->data->{database}{$db_dump_uuid}) or (not $anvil->data->{database}{$db_dump_uuid}{host})) + { + # Not a database we're peered with anymore, ignore it. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0652", variables => { + full_path => $full_path, + host_uuid => $db_dump_uuid, + }}); + next; + } + + # Still here? This is a candidate for loading. What's the mtime on this file? + $anvil->Storage->get_file_stats({debug => $debug, file_path => $full_path}); + my $mtime = $anvil->data->{file_stat}{$full_path}{modified_time}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { directory => $directory }}); + + if ($mtime > $youngest_dump) + { + # This is the youngest, so far. + $youngest_dump = $mtime; + $use_dump = $full_path; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + youngest_dump => $youngest_dump, + full_path => $full_path, + }}); + } + } + else + { + # Not a dump file, ignore it. + next; + } + } + closedir(DIRECTORY); + + # Did I find a dump to load that's newer than my most recent backup? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { use_dump => $use_dump }}); + if ($use_dump) + { + # Is one of our dumps newer? If so, don't load. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + youngest_dump => $youngest_dump, + backup_age => $backup_age, + }}); + if ($backup_age > $youngest_dump) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0661"}); + } + else + { + # Yup! This will start the database, if needed. + my $file_size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$use_dump}{size}}); + my $file_size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$use_dump}{size}}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0656", variables => { + file => $use_dump, + size => $file_size, + size_bytes => $file_size_bytes, + }}); + + my $problem = $anvil->Database->load_database({ + debug => 2, + backup => 0, + load_file => $use_dump, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Failed, delete the file we tried to load. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "err", key => "error_0355", variables => { file => $use_dump }}); + unlink $use_dump; + } + else + { + # Success! Delete all backups we found from other hosts so we don't + # reload them in the future. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0657"}); + foreach my $full_path (@{$dump_files}) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0589", variables => { file => $full_path }}); + unlink $full_path; + } + } + } + } + + # Check if the dameon is running + my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }}); + if (not $running) + { + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + } + } + + # Reconnect + if (not $retry) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0662"}); + $anvil->Database->connect({debug => $debug, retry => 1}); + } + } + my $total = tv_interval ($start_time, [gettimeofday]); #print "Total runtime: [".$total."]\n"; @@ -1635,17 +1894,11 @@ sub connect "database::${uuid}::password" => $anvil->Log->is_secure($anvil->data->{database}{$uuid}{password}), }}); - # Copy my alert hash before I delete the uuid. -# my $error_array = []; - # Delete this DB so that we don't try to use it later. This is a quiet alert because the # original connection error was likely logged. my $say_server = $anvil->data->{database}{$uuid}{host}.":".$anvil->data->{database}{$uuid}{port}." -> ".$database_name; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, priority => "alert", key => "log_0092", variables => { server => $say_server, uuid => $uuid }}); - # Delete it from the list of known databases for this run. - delete $anvil->data->{database}{$uuid}; - # If I've not sent an alert about this DB loss before, send one now. # my $set = $anvil->Alert->check_alert_sent({ # debug => $debug, @@ -4564,7 +4817,7 @@ SELECT FROM scan_lvm_vgs WHERE - scan_lvm_vg_internal_uuid = ".$anvil->Database->quote($storage_group_member_vg_uuid)."; + scan_lvm_vg_internal_uuid = ".$anvil->Database->quote($storage_group_member_vg_uuid)." ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); @@ -4585,6 +4838,221 @@ WHERE }}); } } + + # Also load the Storage group extended data. + $anvil->Storage->get_storage_group_details({ + debug => $debug, + storage_group_uuid => $storage_group_uuid, + }); + } + + # If the Anvil! members have changed, we'll need to update the storage groups. This checks for that. + $anvil->Database->get_anvils({debug => $debug}); + foreach my $anvil_uuid (keys %{$anvil->data->{storage_groups}{anvil_uuid}}) + { + my $anvil_name = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_name}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + anvil_name => $anvil_name, + node1_host_uuid => $node1_host_uuid, + node2_host_uuid => $node2_host_uuid, + dr1_host_uuid => $dr1_host_uuid, + }}); + foreach my $storage_group_uuid (keys %{$anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}}) + { + my $group_name = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + storage_group_uuid => $storage_group_uuid, + group_name => $group_name, + }}); + + my $size_to_match = 0; + my $node1_seen = 0; + my $node2_seen = 0; + my $dr1_seen = $dr1_host_uuid ? 0 : 1; # Only set to '0' if DR exists. + foreach my $this_host_uuid (keys %{$anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}}) + { + my $storage_group_member_uuid = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$this_host_uuid}{storage_group_member_uuid}; + my $internal_vg_uuid = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$this_host_uuid}{vg_internal_uuid}; + my $vg_size = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$this_host_uuid}{vg_size}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + this_host_uuid => $this_host_uuid, + storage_group_member_uuid => $storage_group_member_uuid, + internal_vg_uuid => $internal_vg_uuid, + vg_size => $anvil->Convert->add_commas({number => $vg_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $vg_size}).")", + }}); + + if ($vg_size > $size_to_match) + { + $size_to_match = $vg_size; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + size_to_match => $anvil->Convert->add_commas({number => $size_to_match})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $size_to_match}).")", + }}); + } + + if ($this_host_uuid eq $node1_host_uuid) + { + $node1_seen = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { node1_seen => $node1_seen }}); + } + elsif ($this_host_uuid eq $node2_host_uuid) + { + $node2_seen = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { node2_seen => $node2_seen }}); + } + elsif (($dr1_host_uuid) && ($this_host_uuid eq $dr1_host_uuid)) + { + $dr1_seen = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dr1_seen => $dr1_seen }}); + } + else + { + # This host doesn't belong in this group anymore. Delete it. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0130", variables => { + storage_group_name => $group_name, + host_name => $anvil->Get->host_name_from_uuid({host_uuid => $this_host_uuid}), + anvil_name => $anvil_name, + }}); + + my $query = "DELETE FROM storage_group_members WHERE storage_group_member_uuid = ".$anvil->Database->quote($storage_group_member_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { query => $query }}); + $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); + } + } + + if ((not $node1_seen) or + (not $node2_seen) or + (not $dr1_seen)) + { + my $hosts = [$node1_host_uuid, $node2_host_uuid]; + if ($dr1_host_uuid) + { + push @{$hosts}, $dr1_host_uuid; + } + + my $reload = 0; + foreach my $this_host_uuid (@{$hosts}) + { + # If we didn't see a host, look for a compatible VG to add. + my $minimum_size = $size_to_match - (2**30); + my $maximum_size = $size_to_match + (2**30); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + minimum_size => $anvil->Convert->add_commas({number => $minimum_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $minimum_size}).")", + maximum_size => $anvil->Convert->add_commas({number => $maximum_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $maximum_size}).")", + }}); + + my $smallest_difference = (2**30); + my $closest_internal_uuid = ""; + my $closest_scan_lvm_vg_uuid = ""; + my $quoted_minimum_size = $anvil->Database->quote($minimum_size); + $quoted_minimum_size =~ s/^'(.*)'$/$1/; + my $quoted_maximum_size = $anvil->Database->quote($maximum_size); + $quoted_maximum_size =~ s/^'(.*)'$/$1/; + my $query = " +SELECT + scan_lvm_vg_uuid, + scan_lvm_vg_internal_uuid, + scan_lvm_vg_size +FROM + scan_lvm_vgs +WHERE + scan_lvm_vg_size > ".$quoted_minimum_size." +AND + scan_lvm_vg_size < ".$quoted_maximum_size." +AND + scan_lvm_vg_host_uuid = ".$anvil->Database->quote($this_host_uuid)." +ORDER BY + scan_lvm_vg_size ASC +;"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + foreach my $row (@{$results}) + { + my $scan_lvm_vg_uuid = $row->[0]; + my $scan_lvm_vg_internal_uuid = $row->[1]; + my $scan_lvm_vg_size = $row->[2]; + my $difference = abs($scan_lvm_vg_size - $size_to_match); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + scan_lvm_vg_uuid => $scan_lvm_vg_uuid, + scan_lvm_vg_internal_uuid => $scan_lvm_vg_internal_uuid, + scan_lvm_vg_size => $anvil->Convert->add_commas({number => $scan_lvm_vg_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $scan_lvm_vg_size}).")", + difference => $anvil->Convert->add_commas({number => $difference})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $difference}).")", + }}); + + # Is this Internal UUID already in a storage group? + my $query = "SELECT COUNT(*) FROM storage_group_members WHERE storage_group_member_vg_uuid = ".$anvil->Database->quote($scan_lvm_vg_internal_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { count => $count }}); + if (not $count) + { + # This VG isn't in a storage group. Is this the closest in size yet? + if ($difference < $smallest_difference) + { + # Closest yet! + $smallest_difference = $difference; + $closest_scan_lvm_vg_uuid = $scan_lvm_vg_internal_uuid; + $closest_internal_uuid = $scan_lvm_vg_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + smallest_difference => $anvil->Convert->add_commas({number => $smallest_difference})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $smallest_difference}).")", + closest_internal_uuid => $closest_internal_uuid, + closest_scan_lvm_vg_uuid => $closest_scan_lvm_vg_uuid, + }}); + } + } + } + + # Did we find a matching VG? + if ($closest_scan_lvm_vg_uuid) + { + # Yup, add it! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "log_0649", variables => { + anvil_name => $anvil_name, + storage_group => $group_name, + host_name => $anvil->Get->host_name_from_uuid({host_uuid => $this_host_uuid}), + vg_internal_uuid => $closest_scan_lvm_vg_uuid, + }}); + + my $storage_group_member_uuid = $anvil->Get->uuid(); + my $query = " +INSERT INTO + storage_group_members +( + storage_group_member_uuid, + storage_group_member_storage_group_uuid, + storage_group_member_host_uuid, + storage_group_member_vg_uuid, + modified_date +) VALUES ( + ".$anvil->Database->quote($storage_group_member_uuid).", + ".$anvil->Database->quote($storage_group_uuid).", + ".$anvil->Database->quote($this_host_uuid).", + ".$anvil->Database->quote($closest_scan_lvm_vg_uuid).", + ".$anvil->Database->quote($anvil->Database->refresh_timestamp)." +);"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { query => $query }}); + $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); + + # Reload + $reload = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { reload => $reload }}); + } + } + if ($reload) + { + $anvil->Database->get_storage_group_data({debug => $debug}); + } + } + } } return(0); @@ -13546,6 +14014,242 @@ WHERE } +=head2 load_database + +This takes a path to an uncompressed SQL database dump file, and loads it into the C<< anvil >> database. During the duration of this operation, remote access to the database will be disabled via C<< iptables >> drop on port 5432! + +If necessary, the database server will be started. + +If the dump is successfully loaded, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned. + +B<< Note >>: This method must be called by the root user. + +B<< Note >>: This always and only works on the local database server's C<< anvil >> database. + +Parameters; + +=head3 backup (optional, default '1') + +This controls whether the data in the existing database is saved to a file prior to the passed-in database file being loaded. + +=head3 load_file (required) + +This is the full path to the SQL file to load into the database. + +=cut +sub load_database +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->load_database()" }}); + + my $backup = defined $parameter->{backup} ? $parameter->{backup} : 1; + my $load_file = defined $parameter->{load_file} ? $parameter->{load_file} : 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + backup => $backup, + load_file => $load_file, + }}); + + # Only the root user can do this + if (($< != 0) && ($> != 0)) + { + # Not root + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0350"}); + return('!!error!!'); + } + + # Does the file exist? + if (not $load_file) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->load_database()", parameter => "load_file" }}); + } + elsif (not -e $load_file) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0103", variables => { file => $load_file }}); + return('!!error!!'); + } + + my $start_time = time; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { start_time => $start_time }}); + + ### TODO: Replace this with System->manage_firewall(). + # Throw up the firewall. Have the open call ready in case we hit an error. + my $block_call = $anvil->data->{path}{exe}{iptables}." -I INPUT -p tcp --dport 5432 -j REJECT"; + my $open_call = $anvil->data->{path}{exe}{iptables}." -D INPUT -p tcp --dport 5432 -j REJECT"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { block_call => $block_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $block_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + # Start the database, if needed. + my $running = $anvil->System->check_daemon({debug => $debug, daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { running => $running }}); + if (not $running) + { + # Start it up. + my $return_code = $anvil->System->start_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Started the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); + } + else + { + # Failed to start + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0094"}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return("!!error!!"); + } + } + + # Backup, if needed. + if ($backup) + { + # Backup the database. + my $dump_file = $anvil->Database->backup_database({debug => $debug}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dump_file => $dump_file }}); + if ($dump_file eq "!!error!!") + { + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return("!!error!!"); + } + } + + # Drop the existing database. + my $drop_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{dropdb}." ".$anvil->data->{sys}{database}{name}."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { drop_call => $drop_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $drop_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + # This is a failure, but it could be that the database simply didn't exist (was already + # dumped). If that's the case, we'll keep going. + my $proceed = 0; + if ($output =~ /database ".*?" does not exist/gs) + { + # proceed. + $proceed = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { proceed => $proceed }}); + } + if (not $proceed) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0353", variables => { + shell_call => $drop_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + } + + # Recreate the DB. + my $create_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{createdb}." --owner ".$anvil->data->{sys}{database}{user}." ".$anvil->data->{sys}{database}{name}."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { create_call => $create_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $create_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0354", variables => { + shell_call => $create_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + + # Finally, load the database. + my $load_call = $anvil->data->{path}{exe}{su}." - postgres -c \"".$anvil->data->{path}{exe}{psql}." ".$anvil->data->{sys}{database}{name}." < ".$load_file."\""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { load_call => $load_call }}); + $output = ""; + $return_code = ""; + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $load_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0355", variables => { + shell_call => $load_call, + return_code => $return_code, + output => $output, + }}); + + # Drop the firewall block + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + return('!!error!!'); + } + + # Open the firewall back up + $output = ""; + $return_code = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { open_call => $open_call }}); + ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $open_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + + # Done! + my $took_time = time - $start_time; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0655", variables => { + file => $load_file, + took => $took_time, + }}); + + return(0); +} + + =head2 lock_file This reads, sets or updates the database lock file timestamp. diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index d110e954..3915aa43 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -20,6 +20,7 @@ my $THIS_FILE = "Storage.pm"; # change_mode # change_owner # check_md5sums +# compress # copy_file # delete_file # find @@ -27,6 +28,7 @@ my $THIS_FILE = "Storage.pm"; # get_size_of_block_device # get_storage_group_details # get_storage_group_from_path +# get_vg_name # make_directory # manage_lvm_conf # move_file @@ -1793,7 +1795,7 @@ LIMIT 1 foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$this_resource}{host}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { this_host_name => $this_host_name }}); - foreach my $this_volume (sort {$a cmp $b} keys %{$$anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}}) + foreach my $this_volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}}) { my $this_minor = $anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}{$this_volume}{device_minor}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { @@ -1938,6 +1940,8 @@ This takes a C<< storage_group_uuid >> and loads information about members into On success, C<< 0 >> is returned. On failure, C<< !!error!! >> is returned. +B<< Note >>: This method is called by C<< Database->get_storage_group_data() >> so generally calling it direcly isn't needed. + Parameters; =head3 storage_group_uuid (required) @@ -2150,7 +2154,7 @@ sub get_storage_group_from_path foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$this_resource}{host}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { this_host_name => $this_host_name }}); - foreach my $this_volume (sort {$a cmp $b} keys %{$$anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}}) + foreach my $this_volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}}) { my $this_minor = $anvil->data->{new}{resource}{$this_resource}{host}{$this_host_name}{volume}{$this_volume}{device_minor}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { @@ -2382,6 +2386,86 @@ LIMIT 1 } +=head2 get_vg_name + +This method takes a Storage Group UUID and a host UUID, and returns the volume group name associated with those. If there is a problem, C<< !!error!! >> is returned. + + my $vg_name = $anvil->Storage->get_vg_name({ + host_uuid => $dr_host_uuid, + storage_group_uuid => $storage_group_uuid, + }); + +Parameters; + +=head3 host_uuid (optional, default Get->host_uuid) + +This is the host's UUID that holds the VG name being searched for. + +=head3 storage_group_uuid (required) + +This is the Storage Group UUID being searched for. + +=cut +sub get_vg_name +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + my $test = defined $parameter->{test} ? $parameter->{test} : 0; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->get_vg_name()" }}); + + my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; + my $storage_group_uuid = defined $parameter->{storage_group_uuid} ? $parameter->{storage_group_uuid} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host_uuid => $host_uuid, + storage_group_uuid => $storage_group_uuid, + }}); + + if (not $host_uuid) + { + $host_uuid = $anvil->Get->host_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }}); + } + if (not $storage_group_uuid) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Storage->get_vg_name()", parameter => "storage_group_uuid" }}); + return('!!error!!'); + } + + my $query = " +SELECT + b.scan_lvm_vg_name +FROM + storage_group_members a, + scan_lvm_vgs b +WHERE + a.storage_group_member_vg_uuid = b.scan_lvm_vg_internal_uuid +AND + a.storage_group_member_storage_group_uuid = ".$anvil->Database->quote($storage_group_uuid)." +AND + a.storage_group_member_host_uuid = ".$anvil->Database->quote($host_uuid)." +;"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + if (not $count) + { + # Not found + return(""); + } + + my $scan_lvm_vg_name = $results->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { scan_lvm_vg_name => $scan_lvm_vg_name }}); + + return($scan_lvm_vg_name); +} + + =head2 make_directory This creates a directory (and any parent directories). @@ -4872,9 +4956,9 @@ fi"; my $shell_call = " if [ -d '".$directory."' ]; then - ".$anvil->data->{path}{exe}{echo}." 'exists'; + echo 'exists'; else - ".$anvil->data->{path}{exe}{echo}." 'not found'; + echo 'not found'; fi"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0166", variables => { shell_call => $shell_call, target => $target, remote_user => $remote_user }}); (my $output, $error, my $return_code) = $anvil->Remote->call({ diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index 0e08089a..8d6709fa 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -1568,6 +1568,11 @@ sub collect_ipmi_data $current_value = $1; $units = "degrees C"; } + if ($current_value =~ /^(.*?)\s+Volts/) + { + $current_value = $1; + $units = "V"; + } my $new_sensor_name = $sensor_name." (".$hex_address.")"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_sensor_name => $new_sensor_name, @@ -4439,7 +4444,7 @@ sub start_daemon This method stops a daemon. The return code from the stop request will be returned. -If the return code for the stop command wasn't read, C<< !!error!! >> is returned. +If the return code for the stop command is returned. Parameters; diff --git a/anvil.conf b/anvil.conf index f5bfda85..a292cebe 100644 --- a/anvil.conf +++ b/anvil.conf @@ -116,7 +116,7 @@ sys::database::log_transactions = 0 # lot of log traffic. If you want to silence these log alerts, you can set the value below to be higher than # your current active log level (default is '1', so set to '2' or '3' to silence). # NOTE: It's important to only use this temporarily. -sys::database::failed_connection_log_level = 1 +#sys::database::failed_connection_log_level = 2 # This controls what log facility to use by default. # NOTE: This will always be 'authpriv' when a log entry is marked as secure. @@ -204,3 +204,8 @@ sys::manage::firewall = 1 # Logging can be set on a per-agent basis with: #scancore::scan-network::log_level = 2 #scancore::scan-network::log_secure = 1 + +# By default, the Anvil! will manage the /etc/hosts file. If this is causing you issue, you can disable this +# behavious by setting this to '0'. Be aware of course that you will need to manually update or add entries +# going forward. +#sys::hosts::manage = 0 diff --git a/notes b/notes index 9a7d1c79..604d9fc3 100644 --- a/notes +++ b/notes @@ -8,51 +8,14 @@ TODO: ============ # Dump -su - postgres -c "pg_dump anvil > /tmp/anvil.out" && mv /tmp/anvil.out /root/ -su - postgres -c "pg_dump --schema-only anvil > /tmp/anvil.out" && mv /tmp/anvil.out /root/ +su - postgres -c "pg_dump anvil > /var/lib/pgsql/anvil.out" +su - postgres -c "pg_dump --schema-only anvil > /var/lib/pgsql/anvil_schema.out" -cp /root/anvil.out /; su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /anvil.out" +su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /var/lib/pgsql/anvil.out" su postgres -c "psql anvil" ============ -# ScanCore post-scan logic; - -Sole node: -1. Evaluate critical shutdown only, if hosting VMs. -2. If not hosting VMs, load-shed if over-heat / power loss for more than 120 seconds - -Action options; -1. Do nothing -2. Pull servers -3. Shut down (once servers are gone) -4. Shut down (gracefully stop servers) - - -* Peer not available - - Thermal is critical, gracefully shut down. - - Power is strongest UPS below ten minutes and time on batteries is over 2 minutes, graceful shut down -* Peer available - - If one node is healthier than the other; - - If we're sicker, do nothing until we have no servers - - If we're healthier, after two minutes, pull - - If health is equal; - - Both nodes have servers; - - Decide who can be evacuated fastest, in case load shed needed. - - Both nodes on batteries or in warning temp for more than 2 minutes; - - If we're the designated survivor, pull servers. - - If we're the sacrifice, wait for the servers to be taken off of us, then shut down. - - Peer has servers, we don't - - If thermal warning or both/all UPSes on batter for two minutes+, shut down - - We have servers, peer doesn't. - - Keep running - - - -1.1 - Our peer may pull from us. -2. - Not Hosting Servers -2.1 - - Jenkins; @@ -696,7 +659,6 @@ drbdadm connect ================== # Server srv01-sql, example showing two disks in one VM. resource srv01-sql { - on mk-a02n01 { node-id 0; volume 0 { diff --git a/scancore-agents/scan-drbd/scan-drbd b/scancore-agents/scan-drbd/scan-drbd index 09b27b99..5e8b9e5a 100755 --- a/scancore-agents/scan-drbd/scan-drbd +++ b/scancore-agents/scan-drbd/scan-drbd @@ -79,7 +79,7 @@ if ($anvil->data->{switches}{purge}) $anvil->nice_exit({exit_code => 0}); } -if ($anvil->DRBD->gather_data()) +if ($anvil->DRBD->gather_data({debug => 2})) { # DRBD not found or configured. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "scan_drbd_error_0001"}); diff --git a/share/words.xml b/share/words.xml index 368f2463..cd618daf 100644 --- a/share/words.xml +++ b/share/words.xml @@ -454,6 +454,53 @@ Failed to parse the XML in the new definition file. The error was: Giving up. + This must be run on a node active in the cluster hosting the server being managed. Exiting. + This Anvil! does not seem to have a DR host. Exiting. + Failed to find an IP we can access the DR host: [#!variable!host_name!#]. Has it been configured? Is it running? Exiting. + Failed to access the DR host: [#!variable!host_name!#] using the IP: [#!variable!ip_address!#]. Is it running? Exiting. + Failed to parse the CIB. Is this node in the cluster? Exiting. + We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. + We can't setup a server to be protected unless both nodes are up, and the peer isn't at this time. Exiting. + We can't remove a server from DR unless both nodes are up, and the peer isn't at this time. Exiting. + '. Exiting.]]> + Failed to find the server: [#!variable!server!#] by name or UUID? Exiting. + The protocol: [#!variable!protocol!#] is invalid. Please use '--help' for more information. + The DR host: [#!variable!host_name!#] doesn't appear to be storage group: [#!variable!storage_group!#]. Unable to proceed. + We need: [#!variable!space_needed!# (#!variables!space_needed_bytes!# Bytes)] from the storage group: [#!variable!storage_group!#], but only: [#!variables!space_on_dr!# (#!variable!space_on_dr_bytes!# bytes)] is available on DR. Unable to proceed. + [ Error ] - The check appears to have failed. Expected a return code of '0', but got: [#!variable!return_code!#] +The output, if any, was +==== +#!variable!output!# +==== + + - Restoring the old config now. + - The problematic new config has been saved as: [#!variable!file!#]. + - The old config has been restored. Exiting. + - The logical volume: [#!variable!lv_path!#] creation failed. Unable to proceed. + Only the root user can load a database file and start the database. + [ Error ] - The 'pg_dump' call to backup the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was +==== +#!variable!output!# +==== + + Only the root user can backup a database. + [ Error ] - The 'dropdb' call to drop the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was +==== +#!variable!output!# +==== + + [ Error ] - The 'createdb' call to create the database failed. Expected a return code of '0', but got: [#!variable!return_code!#]. +Full command called: [#!variable!shell_call!#] +The output, if any, was; +==== +#!variable!output!# +==== + + Failed to load the database file: [#!variable!file!#]. Deleting it so it's not considered in the next load attempt. @@ -693,6 +740,41 @@ sys::manage::firewall = 1 # This is the "short list" of servers shown when provisioning a new server. To see the full list of options, # run '/usr/bin/osinfo-query os' on any machine in the Anvil!. #sys::servers::os_short_list = debian10,fedora32,freebsd12.1,gentoo,macosx10.7,msdos6.22,openbsd6.7,opensuse15.2,rhel5.11,rhel6.10,rhel7.9,rhel8.3,sles12sp5,solaris11,ubuntu20.04,win10,win2k16,win2k19 +]]> + + + + @@ -1143,13 +1225,48 @@ It should be provisioned in the next minute or two. Loading the new corosync config exited with return code: [#!variable!return_code!#] and output: [#!variable!output!#] Manage VNC Pipes Perform VNC pipe operation [#!variable!operation!#] for server UUID [#!variable!server_uuid!#] from host UUID [#!variable!host_uuid!#]. - Manage a server menu: * Please enter the name of the server you want to manage -=] Servers available to manage on the Anvil! [#!variable!anvil_name!#] [=- -=] Managing the server: [#!variable!server_name!#] on the Anvil!: [#!variable!anvil_name!#] Get Server VM Screenshot Fetch a screenshot of the specified server VM and represent it as a Base64 string. + Running sanity checks. + Sanity checks complete! + Beginning to protect the server: [#!variable!server!#]! + Verified that there is enough space on DR to proceed. +* The connection protocol will be: ..... [#!variable!protocol!#] +* Node 1 to DR will use TCP port: ...... [#!variable!node1_to_dr_port!#] +* Node 2 to DR will use TCP port: ...... [#!variable!node2_to_dr_port!#] +* We will update the DRBD resource file: [#!variable!config_file!#] +The following LV(s) will be created: + + - Resource: [#!variable!resource!#], Volume: [#!variable!volume!#] + - The LV: [#!variable!lv_path!#] with the size: [#!variable!lv_size!# (#!variable!lv_size_bytes!# Bytes)] will be created. + The resource file: [#!variable!file!#] doesn't need to be updated. + - Backed up old config as: [#!variable!backup_file!#]. Updating it now. + - Updated! Verifying... + - The new config looks good! + - Updating the peers now... + - Updating the resource file: [#!variable!file!#] on the host: [#!variable!host_name!#] via IP: [#!variable!ip_address!#]. + - Creating logical volumes on DR, if needed. New LVs will have metadata created. + - Volume: [#!variable!volume!#], logical volume: [#!variable!lv_path!#]. + - The logical volume: [#!variable!lv_path!#] already exists, skipping it, and NOT create DRBD meta data. + - Reloading the local DRBD resource config. + - Reloading the resource: [#!variable!server!#] on the host: [#!variable!host_name!#]. + - Checking, and starting where needed, the: [#!variable!server!#] resource locally and on peers. + - Checking locally. + - Checking the host: [#!variable!host_name!#] + - Checking to see if the DR host has connected to this resource yet. + - Not up yet, will check again at: [#!variable!next_check!#]. + - Up! + Done! The server: [#!variable!server!#] is now being protected on DR! +It will take time for it to initialize, please be patient. + - Running the scan agent 'scan-drbd' locally to record the newly used TCP ports. + - Running the scan agent 'scan-drbd' on: [#!variable!host_name!#] to record the newly used TCP ports. + The job has been recorded with the UUID: [#!variable!job_uuid!#], it will start in just a moment if anvil-daemon is running. + Manage DR tasks for a given server + This job can protect, remove (unprotect), connect, disconnect or update (connect, sync, disconnect) a given server. Starting: [#!variable!program!#]. @@ -1229,7 +1346,7 @@ Connecting to Database with configuration ID: [#!variable!uuid!#] users_home() was asked to find the home directory for the user: [#!variable!user!#], but was unable to do so.]]> SSH session opened without a password to: [#!variable!target!#]. #!variable!name!#] with the UUID: [#!variable!uuid!#] did not respond to pings and 'database::#!variable!uuid!#::ping' is not set to '0' in '#!data!path::configs::anvil.conf!#', skipping it.]]> - [ Warning ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] can not be used, skipping it. + [ Note ] - The database: [#!variable!name!#] on host: [#!variable!host!#] with UUID: [#!variable!uuid!#] is not available, skipping it. The database connection error was: ---------- @@ -1900,6 +2017,20 @@ The file: [#!variable!file!#] needs to be updated. The difference is: #!variable!program!# is disabled in anvil.conf. and '--force' was not used. Exiting. [ Note ] - The network interface: [#!variable!name!#] with 'network_interface_uuid': [#!variable!uuid!#] is a duplicate, removing it from the database(s). [ Note ] - Managing /etc/hosts has been disabled. + [ Note ] - The Anvil!: [#!variable!anvil_name!#]'s storage group: [#!variable!storage_group!#] didn't have an entry for the host: [#!variable!host_name!#]. The volume group: [#!variable!vg_internal_uuid!#] is a close fit and not in another storage group, so adding it to this storage group now. + [ Note ] - We're a Striker and we did not connect to a peer's database. Will check now if we can load a recent backup, then start postgres locally (with or without a load). + Evaluating the dump file: [#!variable!full_path!#]. + The database host UUID: [#!variable!host_uuid!#] is not configured here, ignoring: [#!variable!full_path!#]. + We created the database dump file: [#!variable!full_path!#], will compare it's modidified time to other dumps we may find. + The database was dumped to: [#!variable!file!#] in: [#!variable!took!#] second(s). The size of the dump file is: [#!variable!size!#] (#!variable!size_bytes) bytes). + The database was loaded successfull from the file: [#!variable!file!#] in: [#!variable!took!#] second(s)! + No databases were available, so we will become primary after loading: [#!variable!file!#], which is: [#!variable!size!#] (#!variable!size_bytes!# bytes). Please be patient, this could take a moment. + The database was loaded, clear it and other DB dumps out now so that they don't get reloaded again in the future. + Sync'ed the file: [#!variable!file!#] to the peer Striker: [#!variable!host_name!#]. The sync took: [#!variable!took!#] seconds, and the file was: [#!variable!size!#] (#!variable!size_bytes!# bytes). + We're going to shut down our database. Creating a backup first. + Stopped the postgresql daemon as a peer is currently primary. + Our most recent database dump is newer than any from our peers. As such, we'll just start the database without a load. + Retrying to connect to the database. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -2261,7 +2392,8 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty Preparing to get server VM screenshot. Finished getting server VM screenshot. Failed to get server VM screenshot; got non-zero return code. - Finished attempting to get server VM screenshot; no operations happened because requirements not met. + Finished attempting to get server VM screenshot; no operations happened because requirements not met.>>> master + Preparing to manage DR for a server. Saved the mail server information successfully! @@ -2922,6 +3054,7 @@ The error was: We will sleep a bit and try again. + [ Warning ] - The storage group: [#!variable!storage_group_name!#] had the host: [#!variable!host_name!#] as a member. This host is not a member (anymore?) of the Anvil!: [#!variable!anvil_name!#]. Removing it from the storage group now. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 7aec9e63..e6c0f4da 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -171,19 +171,23 @@ my $delay = set_delay($anvil); # Once a minute, we'll check the md5sums and see if we should restart. # Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards). -$anvil->data->{timing}{minute_checks} = 60; -$anvil->data->{timing}{daily_checks} = 86400; -$anvil->data->{timing}{repo_update_interval} = 86400; -$anvil->data->{timing}{next_minute_check} = $now_time - 1; -$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; +$anvil->data->{timing}{minute_checks} = 60; +$anvil->data->{timing}{ten_minute_checks} = 600; +$anvil->data->{timing}{daily_checks} = 86400; +$anvil->data->{timing}{repo_update_interval} = 86400; +$anvil->data->{timing}{next_minute_check} = $now_time - 1; +$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1; +$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, - "s2:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, - "s3:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, - "s4:now_time" => $now_time, - "s5:delay" => $delay, - "s6:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, - "s7:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, + "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, + "s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, + "s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, + "s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, + "s5:now_time" => $now_time, + "s6:delay" => $delay, + "s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, + "s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, + "s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, }}); # Disconnect. We'll reconnect inside the loop @@ -374,26 +378,26 @@ sub check_network # The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such, # we'll start watching the network after the uptime is 2 minutes. my $uptime = $anvil->Get->uptime; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }}); if ($uptime > 120) { # Check that bonds are up. Degraded bonds will be left alone. if (not $anvil->data->{sys}{network}{initial_checks}) { my $running = $anvil->System->check_daemon({daemon => "NetworkManager"}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); if (not $running) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0250", variables => { daemon => "NetworkManager" }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }}); my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); } #$anvil->Network->check_network({heal => "all"}); $anvil->data->{sys}{network}{initial_checks} = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks}, }}); } @@ -418,10 +422,11 @@ sub handle_periodic_tasks my $now_time = time; my $type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "s1:now_time" => $now_time, - "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, - "s3:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, - "s4:type" => $type, + "s1:now_time" => $now_time, + "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, + "s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, + "s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, + "s5:type" => $type, }}); # Time to run once per minute tasks. @@ -503,6 +508,126 @@ sub handle_periodic_tasks $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); } + # Now check to see if it's time to run less frequent tasks. + if ($now_time >= $anvil->data->{timing}{next_ten_minute_check}) + { + my $host_type = $anvil->Get->host_type(); + my $host_uuid = $anvil->Get->host_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host_type => $host_type, + host_uuid => $host_uuid, + }}); + + # Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our + # database. + if ($host_type eq "striker") + { + if ($anvil->data->{sys}{database}{connections} > 1) + { + # Sort by UUID, skip the first, and see if we're one of the others. + my $first_uuid = ""; + foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }}); + if (not $first_uuid) + { + $first_uuid = $uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }}); + } + elsif ($uuid eq $host_uuid) + { + # This is us, backup and shut down. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"}); + + # Switch the read_uuid and then close + $anvil->data->{sys}{database}{read_uuid} = $first_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }}); + + # Disconnect + $anvil->data->{cache}{database_handle}{$uuid}->disconnect; + delete $anvil->data->{cache}{database_handle}{$uuid}; + + # Create a backup, this is useful also for setting the mtime of the last time + # we were up. + my $dump_file = $anvil->Database->backup_database({debug => 3}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); + + # Stop the daemon + my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); + if ($return_code eq "0") + { + # Stopped the daemon. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"}); + } + } + } + } + + # If we're the active database, dump out database out and rsync it to our peers. + my $peers = keys %{$anvil->data->{database}}; + my $connections = $anvil->data->{sys}{database}{connections}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peers => $peers, + connections => $connections, + }}); + if (exists $anvil->data->{cache}{database_handle}{$host_uuid}) + { + # Verify that the database is up. + my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); + if ($running) + { + # Backup our DB. + my $dump_file = $anvil->Database->backup_database({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); + + # Now rsync it to our peer(s) + foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) + { + next if $this_host_uuid eq $host_uuid; + + my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/"; + my $password = $anvil->data->{database}{$this_host_uuid}{password}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + this_host_uuid => $this_host_uuid, + destination => $destination, + password => $anvil->Log->is_secure($password), + }}); + + my $start_time = time; + my $failed = $anvil->Storage->rsync({ + debug => 3, + destination => $destination, + password => $password, + source => $dump_file, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); + + my $rsync_time = time - $start_time; + my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}}); + my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}}); + my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => { + file => $dump_file, + host_name => $target_name, + took => $rsync_time, + size => $size, + size_bytes => $size_bytes, + }}); + } + } + } + } + + # Update the next check time. + $anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, + "s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, + }}); + } + # Now check to see if it's time to run daily tasks. if ($now_time >= $anvil->data->{timing}{next_daily_check}) { @@ -1030,6 +1155,16 @@ sub prep_database { my ($anvil) = @_; + # If there's a backup file, we're configured and possibly just off. + my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".out"; + $dump_file =~ s/\/\//\//g; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { dump_file => $dump_file }}); + if (-e $dump_file) + { + # No need to prepare. + return(0); + } + # Only run this if we're a dashboard. my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); @@ -1368,7 +1503,7 @@ sub update_state_file #my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}.$anvil->Log->switches; my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr new file mode 100755 index 00000000..e84de9df --- /dev/null +++ b/tools/anvil-manage-dr @@ -0,0 +1,1597 @@ +#!/usr/bin/perl +# +# This manages if a server is backed up to a DR host or not. When enabled, it can start or stop replication. +# +# NOTE: Unlike most jobs, this one will directly work on the peer node and the DR host using SSH connections. +# This behaviour is likely to change later as it's not ideal. +# +# Exit codes; +# 0 = Normal exit. +# 1 = Any problem that causes an early exit. +# + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; +use Term::Cap; +use Text::Diff; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); + +# +$anvil->data->{switches}{'connect'} = ""; # connect an existing DR resource +$anvil->data->{switches}{disconnect} = ""; # disconnect +$anvil->data->{switches}{'job-uuid'} = ""; # Used later +$anvil->data->{switches}{protect} = ""; # Set +$anvil->data->{switches}{protocol} = ""; # "sync", "async" or "long-throw" +$anvil->data->{switches}{remove} = ""; # Set +$anvil->data->{switches}{server} = ""; # Name or UUID +$anvil->data->{switches}{update} = ""; # connects, if needed, and disconnects once UpToDate +$anvil->data->{switches}{Yes} = ""; # Set to avoid confirmation, not case sensitive +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::connect' => $anvil->data->{switches}{'connect'}, + 'switches::disconnect' => $anvil->data->{switches}{disconnect}, + 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, + 'switches::protect' => $anvil->data->{switches}{protect}, + 'switches::protocol' => $anvil->data->{switches}{protocol}, + 'switches::remove' => $anvil->data->{switches}{remove}, + 'switches::server' => $anvil->data->{switches}{server}, + 'switches::update' => $anvil->data->{switches}{update}, + 'switches::Yes' => $anvil->data->{switches}{Yes}, +}}); + +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); +if (not $anvil->data->{sys}{database}{connections}) +{ + # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try + # again after we exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); +} + +# If we've got a job UUID, load the job details. +if ($anvil->data->{switches}{'job-uuid'}) +{ + load_job($anvil); +} + +my $terminal = ""; +if (not $anvil->data->{switches}{'job-uuid'}) +{ + my $termios = new POSIX::Termios; + $termios->getattr; + my $ospeed = $termios->getospeed; + $terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { terminal => $terminal }}); + + $terminal->Trequire(qw/ce ku kd/); + print $terminal->Tputs('cl'); +} + +sanity_check($anvil, $terminal); + +do_task($anvil, $terminal); + + +$anvil->nice_exit({exit_code => 0}); + + +############################################################################################################# +# Functions # +############################################################################################################# + +sub do_task +{ + my ($anvil, $terminal) = @_; + + # What task am I doing? + if ($anvil->data->{switches}{protect}) + { + + } + + return(0); +} + +sub sanity_check +{ + my ($anvil, $terminal) = @_; + + # Begin sanity checks + $anvil->Job->update_progress({ + progress => 10, + message => "job_0358", + }); + + # Are we a node or DR? + my $host_type = $anvil->Get->host_type(); + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host_type => $host_type, + anvil_uuid => $anvil_uuid, + }}); + + if (($host_type ne "node") or (not $anvil_uuid)) + { + # This must be run on a node active in the cluster hosting the server being managed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0332"}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0332", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Get the Anvil! details. + $anvil->Database->get_hosts(); + $anvil->Database->get_anvils(); + $anvil->Database->get_storage_group_data(); + + # Does this Anvil! have a DR node? + if (not $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}) + { + # This Anvil! does not seem to have a DR host. Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0333"}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0333", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Can we access DR? + my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; + my $dr_ip = $anvil->System->find_matching_ip({ + debug => 2, + host => $dr1_host_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + password => $anvil->Log->is_secure($password), + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr_ip => $dr_ip, + }}); + if ((not $dr_ip) or ($dr_ip eq "!!error!!")) + { + # Failed to find an IP we can access the DR host. Has it been configured? Is it running? Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0334", variables => { host_name => $dr1_host_name }}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0334,!!host_name!".$dr1_host_name."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Test access. + my $access = $anvil->Remote->test_access({ + target => $dr_ip, + password => $password, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + if (not $access) + { + # Failed to access the DR host. Is it running? Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0335", variables => { + host_name => $dr1_host_name, + ip_address => $dr_ip, + }}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0335,!!host_name!".$dr1_host_name."!!,!!ip_address!".$dr_ip."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Can we parse the CIB? + my ($problem) = $anvil->Cluster->parse_cib(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0336", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + $anvil->nice_exit({exit_code => 1}); + } + + # Both nodes need to be in the cluster, are they? + if (not $anvil->data->{cib}{parsed}{'local'}{ready}) + { + # We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0337", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + ### TODO: We can queue a job to update the peer later, there's no real need, in the long run, for the + ### peer to be online. + # If we're protecting or removing a server from DR, the peer needs to be up. + if ((($anvil->data->{switches}{protect}) or + ($anvil->data->{switches}{remove}) or + ($anvil->data->{switches}{protocol})) && + (not $anvil->data->{cib}{parsed}{peer}{ready})) + { + if ($anvil->data->{switches}{protect}) + { + # We can't setup a server to be protected unless both nodes are up, and the peer + # isn't at this time. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0338"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0338", + job_status => "failed", + }); + } + else + { + # We can't remove a server from DR unless both nodes are up, and the peer isn't at + # this time. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0339"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0339", + job_status => "failed", + }); + } + $anvil->nice_exit({exit_code => 1}); + } + + # Verify we found the server. + $anvil->data->{server}{'server-name'} = ""; + $anvil->data->{server}{'server-uuid'} = ""; + $anvil->data->{server}{'anvil-uuid'} = $anvil_uuid; + if (not $anvil->data->{switches}{server}) + { + # Please specify the server to manager using '--server '. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0340"}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0340", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + else + { + my $server = $anvil->data->{switches}{server}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server => $server }}); + + $anvil->Database->get_servers(); + if (exists $anvil->data->{servers}{server_uuid}{$server}) + { + $anvil->data->{server}{'server-uuid'} = $server; + $anvil->data->{server}{'server-name'} = $anvil->data->{servers}{server_uuid}{$server}{server_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'server::server-uuid' => $anvil->data->{server}{'server-uuid'}, + 'server::server-name' => $anvil->data->{server}{'server-name'}, + }}); + } + if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}) + { + $anvil->data->{server}{'server-name'} = $server; + $anvil->data->{server}{'server-uuid'} = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}{server_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'server::server-uuid' => $anvil->data->{server}{'server-uuid'}, + 'server::server-name' => $anvil->data->{server}{'server-name'}, + }}); + } + } + + # Get and parse the server's definition to find the DRBD devices. + if ((not $anvil->data->{server}{'server-uuid'}) or (not $anvil->data->{server}{'server-name'})) + { + # Failed to find the server by name or UUID. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0341", variables => { server => $anvil->data->{switches}{server} }}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0341,!!server!".$anvil->data->{switches}{server}."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + if (not $anvil->data->{switches}{protocol}) + { + $anvil->data->{switches}{protocol} = "async"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::protocol' => $anvil->data->{switches}{protocol}, + }}); + } + elsif (($anvil->data->{switches}{protocol} ne "sync") && + ($anvil->data->{switches}{protocol} ne "async") && + ($anvil->data->{switches}{protocol} ne "long-throw")) + { + # The protocol is invalid. Please use '--help' for more information. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0342", variables => { protocol => $anvil->data->{switches}{protocol} }}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0341,!!protocol!".$anvil->data->{switches}{protocol}."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Are we being asked to actuall do something? + if (((not $anvil->data->{switches}{'connect'}) && + (not $anvil->data->{switches}{disconnect}) && + (not $anvil->data->{switches}{protect}) && + (not $anvil->data->{switches}{remove}) && + (not $anvil->data->{switches}{update})) or + ($anvil->data->{switches}{help}) or + ($anvil->data->{switches}{h})) + { + print " +What do you want to do? + +Options (all require --server ); + + --connect + + Connect a server already on DR to it's DR copy, update the data there if needed and begin streaming + replication. + + --disconnect + + Disconnect a server from the DR image. This will end streaming replication. + + --protect + + The sets up the server to be imaged on DR, if it isn't already protected. + + Notes: If the server is not running, the DRBD resource volume(s) will be brought up. Both nodes need + to be online and in the cluster. + + --protocol , default 'async' + + This allows the protocol used to replicate data to the DR host to be configured. By default, 'async' + is used. + + Modes: + + async (default) + + This tells the storage layer to consider the write to be completed once the data is on the + active node's network transmit buffer. In this way, the DR host is allowed to fall behind a + small amount, but the active nodes will not slow down because of higher network transit times + to the DR location. + + NOTE: The transmit (TX) buffer size can be checked / updated with 'ethtool -g '. + If the transmit buffer fills, storage will hold until the buffer flushes, causing + periodic storage IO waits. You can increase the buffer size to a certain degree with + 'ethtool -G tx ' (set on all storage network link devices on both + nodes. For more information, see: + + https://www.linuxjournal.com/content/queueing-linux-network-stack + + or + + https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/monitoring-and-tuning-the-rx-ring-buffer_configuring-and-managing-networking + + If you set the maximum transmit buffer size and still run into IO waits, consider + 'long-throw'. + + sync + + This tells the storage layer to consider the write complete when the data has reached the DR + host's storage (when the data is committed to disk on DR). This means that the DR host will + never fall behind. However, if the DR's network latency is higher or the bandwidth to the DR + is lower than that of the latency/bandwidth between the nodes, then total storage performance + will be reduced to DR network speeds while DR is connected. + + This should be tested before implemented in production. + + long-throw + + This is an option that requires an additional license fee to use. + + This option (based on LINBIT's DRBD Proxy) and is designed for DR hosts that are connected + over a wide-area network (or other cases where the connection to the DR is high-latency, low + bandwidth or intermittently interrupted). It uses RAM on the host to act, effectively, as a + very large transmit buffer. This requires allocating host RAM to the task, and so could + reduces the available RAM assignable to assign to servers. + + In this mode, the DR host is allowed to fall further behind production, but it significantly + reduces (hopefully eliminates) how often node replication waits because of a full transmit + buffer. + + The default size is 16 MiB, with a maximum size of 16 GiB. When the size is set to over + 1 GiB, the size allocated to this buffer is accounted for when calculating available RAM that + can be assigned to hosted servers. + + --remove + + This removes the DR image from the DR host for the server, freeing up space on DR but removing the + protection afforded by DR. + + --update + + This tells the DR to be connected and sync, Once the volume(s) on DR are 'UpToDate', the connection + is closed. This provides a point in time update of the server's image on DR. + + --Yes + + Note the capital 'Y'. This can be set to proceed without confirmation. Use carefully with '--protect' + and '--remove'! If the '--job-uuid' is set, this is assumed and no prompt will be presented. + +Exiting. +"; + if (($anvil->data->{switches}{help}) or ($anvil->data->{switches}{h})) + { + $anvil->nice_exit({exit_code => 0}); + } + else + { + $anvil->nice_exit({exit_code => 1}); + } + } + + # Sanity checks complete! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0359"}); + $anvil->Job->update_progress({ + progress => 20, + message => "job_0359", + }); + + # If we're protecting, make sure there's enough space on the DR host. + if ($anvil->data->{switches}{protect}) + { + process_protect($anvil, $terminal); + } + + return(0); +} + +sub process_protect +{ + my ($anvil, $terminal) = @_; + + # Parse out the DRBD resource's backing the server and get their LV sizes. + $anvil->Database->get_server_definitions(); + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + my $anvil_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; + my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{short_host_name}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name}; + my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{short_host_name}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; + my $dr1_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{short_host_name}; + my $server_name = $anvil->data->{server}{'server-name'}; + my $server_uuid = $anvil->data->{server}{'server-uuid'}; + my $short_host_name = $anvil->Get->short_host_name(); + my $server_definition_xml = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, + anvil_password => $anvil->Log->is_secure($anvil_password), + node1_host_uuid => $node1_host_uuid, + node1_host_name => $node1_host_name, + node1_short_host_name => $node1_short_host_name, + node2_host_uuid => $node2_host_uuid, + node2_host_name => $node2_host_name, + node2_short_host_name => $node2_short_host_name, + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr1_short_host_name => $dr1_short_host_name, + server_name => $server_name, + server_uuid => $server_uuid, + server_definition_xml => $server_definition_xml, + short_host_name => $short_host_name, + }}); + + # Sanity checks complete! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0360", variables => { server => $anvil->data->{server}{'server-name'} }}); + $anvil->Job->update_progress({ + progress => 30, + message => "job_0360,!!server!".$anvil->data->{server}{'server-name'}."!!", + }); + + $anvil->Server->parse_definition({ + debug => 2, + host => $short_host_name, + server => $anvil->data->{server}{'server-name'}, + source => "from_db", + definition => $server_definition_xml, + }); + + $anvil->DRBD->gather_data({debug => 2}); + + my $server_ram = $anvil->data->{server}{$short_host_name}{$server_name}{'from_db'}{memory}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_ram => $anvil->Convert->add_commas({number => $server_ram})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram}).")", + }}); + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$short_host_name}{$server_name}{drbd}{resource}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); + + foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}}) + { + my $this_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + this_host_name => $this_host_name, + this_host_uuid => $this_host_uuid, + }}); + + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}}) + { + # Always get the LV sizes, as that factors metadata. DRBD size is + # minus metadata, and 0 when down. + my $device_path = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path}; + my $backing_disk = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk}; + my $device_minor = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor}; + my $tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:volume" => $volume, + "s2:device_path" => $device_path, + "s3:backing_disk" => $backing_disk, + "s4:device_minor" => $device_minor, + }}); + + my $this_size = $anvil->Storage->get_size_of_block_device({debug => 2, host_uuid => $this_host_uuid, path => $backing_disk}); + if ($this_size eq "") + { + # DRBD config file was updated, but LV not created yet. + next; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + this_size => $anvil->Convert->add_commas({number => $this_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $this_size}).")", + }}); + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + this_size => $anvil->Convert->add_commas({number => $this_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $this_size}).")", + }}); + + if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{size}) or (not $anvil->data->{server}{drbd}{$resource}{$volume}{size})) + { + $anvil->data->{server}{drbd}{$resource}{$volume}{size} = $this_size; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::size" => $anvil->data->{server}{drbd}{$resource}{$volume}{size}, + }}); + } + + if (not exists $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid}) + { + $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid} = ""; + } + + ### NOTE: This check make sense only under the assumption that the DRBD minor + ### is common across both nodes. This should be the case, but doesn't + ### strictly have to be so. + if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}) or + (not defined $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}) or + ($anvil->data->{server}{drbd}{$resource}{$volume}{minor_number} eq "")) + { + $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number} = $device_minor; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::minor_number" => $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}, + }}); + } + + if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}) or + (not defined $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}) or + ($anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port} eq "")) + { + $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port} = $tcp_port; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::tcp_port" => $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}, + }}); + } + + # What storage group does this belong to? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid}, + }}); + if (not $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid}) + { + my $storage_key = $resource."/".$volume; + my $storage_group_uuid = $anvil->Storage->get_storage_group_from_path({ + debug => 2, + anvil_uuid => $anvil_uuid, + path => $backing_disk, + }); + my $storage_group_name = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + storage_key => $storage_key, + storage_group_uuid => $storage_group_uuid, + storage_group_name => $storage_group_name, + }}); + + # We'll need to sum up the volumes on each storage group, as + # it's possible the volumes are on different SGs. + $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid} = $storage_group_uuid; + $anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}{$storage_key} = 1; + $anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid} = $storage_group_uuid; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid}, + "server::storage_groups::${storage_group_name}::used_by::${storage_key}" => $anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}{$storage_key}, + "server::storage_groups::${storage_group_name}::storage_group_uuid" => $anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid}, + }}); + } + + if ($this_size > $anvil->data->{server}{drbd}{$resource}{$volume}{size}) + { + $anvil->data->{server}{drbd}{$resource}{$volume}{size} = $this_size; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::drbd::${resource}::${volume}::size" => $anvil->Convert->add_commas({number => $anvil->data->{server}{drbd}{$resource}{$volume}{size}})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{server}{drbd}{$resource}{$volume}{size}}).")", + }}); + } + } + } + } + + # Make sure there is enough space on DR for the volumes under this VM. + my $problem = 0; + my $config_file = ""; + foreach my $storage_group_name (sort {$a cmp $b} keys %{$anvil->data->{server}{storage_groups}}) + { + my $storage_group_uuid = $anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + storage_group_name => $storage_group_name, + storage_group_uuid => $storage_group_uuid, + }}); + + # First, is this SG on DR? + if (not exists $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid}) + { + # The DR host doesn't appear to be storage group. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0343", variables => { + host_name => $dr1_host_name, + storage_group => $$storage_group_name, + }}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0343,!!host_name!".$dr1_host_name."!!,!!storage_group!".$$storage_group_name."!!", + }); + $problem = 1; + } + + my $space_needed = 0; + foreach my $resource_key (sort {$a cmp $b} keys %{$anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}}) + { + my ($resource, $volume) = ($resource_key =~ /^(.*)\/(\d+)$/); + my $volume_size = $anvil->data->{server}{drbd}{$resource}{$volume}{size}; + $space_needed += $volume_size, + $config_file = $anvil->data->{new}{resource}{$resource}{config_file}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + config_file => $config_file, + resource_key => $resource_key, + resource => $resource, + volume => $volume, + volume_size => $anvil->Convert->add_commas({number => $volume_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $volume_size}).")", + space_needed => $anvil->Convert->add_commas({number => $space_needed})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}).")", + }}); + + } + + # Is there enough space on DR? + my $space_on_dr = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid}{vg_free}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + space_on_dr => $anvil->Convert->add_commas({number => $space_on_dr})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_on_dr}).")", + space_needed => $anvil->Convert->add_commas({number => $space_needed})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}).")", + }}); + if ($space_needed > $space_on_dr) + { + my $variables = { + space_needed => $anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}), + space_needed_bytes => $anvil->Convert->add_commas({number => $space_needed}), + storage_group => $storage_group_name, + space_on_dr => $anvil->Convert->bytes_to_human_readable({'bytes' => $space_on_dr}), + space_on_dr_bytes => $anvil->Convert->add_commas({number => $space_on_dr}), + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0344", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0344", + variables => $variables + }); + $problem = 1; + } + } + if ($problem) + { + $anvil->nice_exit({exit_code => 1}); + } + + # Get net next pair of TCP ports. + my (undef, $tcp_ports) = $anvil->DRBD->get_next_resource({ + debug => 2, + dr_tcp_ports => 1, + }); + my ($node1_to_dr_port, $node2_to_dr_port) = split/,/, $tcp_ports; + + # Show what we're doing + my $variables = { + protocol => $anvil->data->{switches}{protocol}, + node1_to_dr_port => $node1_to_dr_port, + node2_to_dr_port => $node2_to_dr_port, + config_file => $config_file, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0361", variables => $variables}); + $anvil->Job->update_progress({ + progress => 40, + message => "job_0361", + variables => $variables, + }); + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}}) + { + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$resource}}) + { + my $variables = { + resource => $resource, + volume => $volume, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0362", variables => $variables}); + $anvil->Job->update_progress({ + progress => 43, + message => "job_0362", + variables => $variables, + }); + my $lv_size = $anvil->data->{server}{drbd}{$resource}{$volume}{size}; + my $storage_group_uuid = $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid}; + my $dr_lv_name = $resource."_".$volume; + my $dr_vg_name = $anvil->Storage->get_vg_name({ + debug => 3, + storage_group_uuid => $storage_group_uuid, + host_uuid => $dr1_host_uuid, + }); + my $dr_lv_path = "/dev/".$dr_vg_name."/".$dr_lv_name; + my $extent_size = $anvil->data->{storage_groups}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid}{vg_extent_size}; + my $extent_count = int($lv_size / $extent_size); + my $shell_call = $anvil->data->{path}{exe}{lvcreate}." -l ".$extent_count." -n ".$dr_lv_name." ".$dr_vg_name." -y"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s01:resource" => $resource, + "s02:volume" => $volume, + "s03:lv_size" => $anvil->Convert->add_commas({number => $lv_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size}).")", , + "s04:storage_group_uuid" => $storage_group_uuid, + "s05:dr_lv_name" => $dr_lv_name, + "s06:dr_vg_name" => $dr_vg_name, + "s07:dr_lv_path" => $dr_lv_path, + "s08:extent_size" => $anvil->Convert->add_commas({number => $extent_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $extent_size}).")", + "s09:extent_count" => $extent_count, + "s10:shell_call" => $shell_call, + }}); + + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lvcreate_call} = $shell_call; + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path} = $dr_lv_path; + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{storage_group_uuid} = $storage_group_uuid; + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_tcp_port} = $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}; + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor} = $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::dr::volumes::${resource}::${volume}::lvcreate_call" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lvcreate_call}, + "server::dr::volumes::${resource}::${volume}::lv_path" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path}, + "server::dr::volumes::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{storage_group_uuid}, + "server::dr::volumes::${resource}::${volume}::drbd_tcp_port" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_tcp_port}, + "server::dr::volumes::${resource}::${volume}::drbd_minor" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor}, + }}); + + # Get the VG name that this volume will be created on. + $variables = { + lv_path => $dr_lv_path, + lv_size => $anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size}), + lv_size_bytes => $anvil->Convert->add_commas({number => $lv_size}), + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0363", variables => $variables}); + $anvil->Job->update_progress({ + progress => 47, + message => "job_0363", + variables => $variables, + }); + } + } + + ### NOTE: 'Yes' is set when a job is picked up, so this won't re-register the job. + my $record_job = 0; + if (not $anvil->data->{switches}{Yes}) + { + # Ask the user to confirm. + print "\n".$anvil->Words->string({key => "message_0021"})."\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + + if ($answer =~ /^y/i) + { + print $anvil->Words->string({key => "message_0175"})."\n"; + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + else + { + print $anvil->Words->string({key => "message_0022"})."\n"; + $anvil->nice_exit({exit_code => 0}); + } + } + elsif (not $anvil->data->{switches}{'job-uuid'}) + { + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + + if ($record_job) + { + my $job_data = "server=".$anvil->data->{switches}{server}."\n"; + $job_data .= "protect=1\n"; + $job_data .= "protocol=".$anvil->data->{switches}{protocol}."\n"; + + # Register the job with this host + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + debug => 2, + job_command => $anvil->data->{path}{exe}{'anvil-manage-dr'}.$anvil->Log->switches, + job_data => $job_data, + job_name => "server::dr", + job_title => "job_0384", + job_description => "job_0385", + job_progress => 0, + job_host_uuid => $anvil->Get->host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + + # Report the job UUID. + print $anvil->Words->string({key => "job_0383", variables => { job_uuid => $job_uuid }})."\n"; + + + $anvil->nice_exit({exit_code => 0}); + } + + ### If we're still here, time to get started. + # Read in the old config and update it. + my $old_resource_config = $anvil->Storage->read_file({file => $config_file}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_resource_config => $old_resource_config }}); + + # Pull the data out of the old file + my $hosts = ""; + my $nodes_tcp_port = ""; + foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); + my $dr1_seen = 0; + foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { this_host_name => $this_host_name }}); + my $node_id = ""; + if (($this_host_name eq $node1_short_host_name) or ($this_host_name eq $node1_host_name)) + { + $node_id = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node_id => $node_id }}); + + if ((not $nodes_tcp_port) && + (exists $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}) && + ($anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port})) + { + $nodes_tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { nodes_tcp_port => $nodes_tcp_port }}); + } + } + elsif (($this_host_name eq $node2_short_host_name) or ($this_host_name eq $node2_host_name)) + { + $node_id = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node_id => $node_id }}); + + if ((not $nodes_tcp_port) && + (exists $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}) && + ($anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port})) + { + $nodes_tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { nodes_tcp_port => $nodes_tcp_port }}); + } + } + elsif (($this_host_name eq $dr1_short_host_name) or ($this_host_name eq $dr1_host_name)) + { + $node_id = 2; + $dr1_seen = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + node_id => $node_id, + dr1_seen => $dr1_seen, + }}); + } + my $volumes = ""; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$resource}}) + { + my $device_path = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path}; + my $backing_disk = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk}; + my $device_minor = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:device_path" => $device_path, + "s2:backing_disk" => $backing_disk, + "s3:device_minor" => $device_minor, + }}); + + $volumes .= $anvil->Words->string({key => "file_0004", variables => { + volume => $volume, + drbd_path => $device_path, + minor => $device_minor, + lv_path => $backing_disk, + }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volumes => $volumes }}); + + # Record the DRBD device for adding DR. + if (not exists $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path}) + { + $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path} = $device_path; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server::dr::volumes::${resource}::${volume}::device_path" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path}, + }}); + } + } + $hosts .= $anvil->Words->string({key => "file_0003", variables => { + short_host_name => $this_host_name, + node_id => $node_id, + volumes => $volumes, + }}); + } + if (not $dr1_seen) + { + # Inject the DR. + my $volumes = ""; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{dr}{volumes}{$resource}}) + { + my $device_path = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path}; + my $backing_disk = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path}; + my $device_minor = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:device_path" => $device_path, + "s2:backing_disk" => $backing_disk, + "s3:device_minor" => $device_minor, + }}); + + $volumes .= $anvil->Words->string({key => "file_0004", variables => { + volume => $volume, + drbd_path => $device_path, + minor => $device_minor, + lv_path => $backing_disk, + }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volumes => $volumes }}); + } + $hosts .= $anvil->Words->string({key => "file_0003", variables => { + short_host_name => $dr1_short_host_name, + node_id => "2", + volumes => $volumes, + }}); + } + } + + ### The connections. + $anvil->Database->get_ip_addresses({debug => 2}); + my $node1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{network}{sn1}{ip_address}; + my $node2_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{network}{sn1}{ip_address}; + my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; + my $dr_protocol = "A"; + if ($anvil->data->{switches}{protocol} eq "sync") + { + $dr_protocol = "C"; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + node1_sn1_ip => $node1_sn1_ip, + node2_sn1_ip => $node2_sn1_ip, + dr1_sn1_ip => $dr1_sn1_ip, + dr_protocol => $dr_protocol, + }}); + + # Node 1 to Node 2 first, then n + my $connections = $anvil->Words->string({key => "file_0005", variables => { + host1_short_name => $node1_short_host_name, + host1_sn_ip => $node1_sn1_ip, + host2_short_name => $node2_short_host_name, + host2_sn_ip => $node2_sn1_ip, + tcp_port => $nodes_tcp_port, + 'c-rate-maximum' => 500, + protocol => "C", + fencing => "resource-and-stonith" + }}); + + # Node 1 to DR + $connections .= $anvil->Words->string({key => "file_0005", variables => { + host1_short_name => $node1_short_host_name, + host1_sn_ip => $node1_sn1_ip, + host2_short_name => $dr1_short_host_name, + host2_sn_ip => $dr1_sn1_ip, + tcp_port => $node1_to_dr_port, + 'c-rate-maximum' => 500, + protocol => $dr_protocol, + fencing => "dont-care" + }}); + + # Node 2 to DR + $connections .= $anvil->Words->string({key => "file_0005", variables => { + host1_short_name => $node2_short_host_name, + host1_sn_ip => $node2_sn1_ip, + host2_short_name => $dr1_short_host_name, + host2_sn_ip => $dr1_sn1_ip, + tcp_port => $node2_to_dr_port, + 'c-rate-maximum' => 500, + protocol => $dr_protocol, + fencing => "dont-care" + }}); + + my $new_resource_config = $anvil->Words->string({key => "file_0006", variables => { + server => $server_name, + hosts => $hosts, + connections => $connections, + }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_resource_config => $new_resource_config }}); + + # Is the new res file the same as the old one? + my $difference = diff \$old_resource_config, \$new_resource_config, { STYLE => 'Unified' }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { difference => $difference }}); + + if (not $difference) + { + # The resource file doesn't need to be updated. + my $variables = { + file => $config_file, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0364", variables => $variables}); + $anvil->Job->update_progress({ + progress => 50, + message => "job_0364", + variables => $variables, + }); + } + else + { + # Write out a test file. + my $test_file = $anvil->data->{path}{directories}{temp}."/test-".$server_name.".res"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); + my ($problem) = $anvil->Storage->write_file({ + debug => 2, + backup => 0, + overwrite => 1, + file => $test_file, + body => $new_resource_config, + user => "root", + group => "root", + mode => "0644", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + + # Validate. + my $shell_call = $anvil->data->{path}{exe}{drbdadm}." --config-to-test ".$test_file." --config-to-exclude ".$config_file." sh-nop"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + # Something went wrong. + my $variables = { + return_code => $return_code, + output => $output, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0345", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0345", + variables => $variables, + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Remove the test file. + unlink $test_file; + + # Backup the res file so we can tell the user where the current config was backed up to in + # case they need to restore it. + my ($backup_file) = $anvil->Storage->backup({file => $config_file}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }}); + my $variables = { backup_file => $backup_file }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0365", variables => $variables}); + $anvil->Job->update_progress({ + progress => 60, + message => "job_0365", + variables => $variables, + }); + + # Write out the new file. + ($problem) = $anvil->Storage->write_file({ + debug => 2, + backup => 0, + overwrite => 1, + file => $config_file, + body => $new_resource_config, + user => "root", + group => "root", + mode => "0644", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0366"}); + $anvil->Job->update_progress({ + progress => 65, + message => "job_0366", + }); + + # Call 'drbdadm dump-xml' to check that it's OK. + ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." dump-xml"}); + if ($return_code) + { + # Something went wrong. + my $variables = { + return_code => $return_code, + output => $output, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0345", variables => $variables}); + $anvil->Job->update_progress({ + progress => 70, + message => "error_0345", + variables => $variables, + }); + + # Restoring the old config now. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0346"}); + $anvil->Job->update_progress({ + progress => 75, + message => "error_0346", + }); + + # Backup the bad file and worn the user. + my ($backup_file) = $anvil->Storage->backup({file => $config_file}); + $variables = { file => $backup_file }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0347", variables => $variables}); + $anvil->Job->update_progress({ + progress => 80, + message => "error_0347", + variables => $variables, + }); + + # Write out the new file. + my ($problem) = $anvil->Storage->write_file({ + debug => 2, + backup => 1, + overwrite => 1, + file => $config_file, + body => $old_resource_config, + user => "root", + group => "root", + mode => "0644", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }}); + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0348"}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0348", + variables => $variables, + }); + $anvil->nice_exit({exit_code => 1}); + } + + # New config is good! Update the file on the peers. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0367"}); + $anvil->Job->update_progress({ + progress => 70, + message => "job_0367", + }); + } + + # New config is good! Update the file on the peers. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0368"}); + $anvil->Job->update_progress({ + progress => 72, + message => "job_0368", + }); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either a node or a DR host + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $variables = { + file => $config_file, + host_name => $peer_host_name, + ip_address => $peer_sn_ip, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0369", variables => $variables}); + $anvil->Job->update_progress({ + progress => 75, + message => "job_0369", + variables => $variables, + }); + my ($problem) = $anvil->Storage->write_file({ + debug => 2, + backup => 1, + overwrite => 1, + file => $config_file, + body => $new_resource_config, + user => "root", + group => "root", + mode => "0644", + target => $peer_sn_ip, + password => $anvil_password, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + } + + # Immediately call scan-drbd on all machines to ensure that if another run is about to happen for a + # different server, it knows the used ports list is updated. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0381"}); + $anvil->Job->update_progress({ + progress => 76, + message => "job_0381", + }); + + my $scan_drbd_call = $anvil->data->{path}{directories}{scan_agents}."/scan-drbd/scan-drbd --force ".$anvil->Log->switches(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_drbd_call => $scan_drbd_call }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $scan_drbd_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either a node or a DR host + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $variables = { host_name => $peer_host_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0382", variables => $variables}); + $anvil->Job->update_progress({ + progress => 77, + message => "job_0382", + variables => $variables, + }); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_sn_ip, + password => $anvil_password, + shell_call => $scan_drbd_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # Create the LV and MD on DR. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0370"}); + $anvil->Job->update_progress({ + progress => 78, + message => "job_0370", + }); + my $create_md = 0; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$server_name}}) + { + my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; + my $lv_path = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lv_path}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + volume => $volume, + dr1_sn1_ip => $dr1_sn1_ip, + lv_path => $lv_path, + }}); + + my $variables = { + volume => $volume, + lv_path => $lv_path, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0371", variables => $variables}); + $anvil->Job->update_progress({ + progress => 80, + message => "job_0371", + variables => $variables, + }); + + my $lv_check_call = " +if [ -e '".$lv_path."' ]; +then + echo exists; +else + echo create; +fi"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lv_check_call => $lv_check_call }}); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $dr1_sn1_ip, + password => $anvil_password, + shell_call => $lv_check_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + if ($output eq "exists") + { + my $variables = { lv_path => $lv_path }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0372", variables => $variables}); + $anvil->Job->update_progress({ + progress => 80, + message => "job_0372", + variables => $variables, + }); + next; + } + + # Create the LV. + my $lvcreate_call = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lvcreate_call}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lvcreate_call => $lvcreate_call }}); + ($output, $error, $return_code) = $anvil->Remote->call({ + target => $dr1_sn1_ip, + password => $anvil_password, + shell_call => $lvcreate_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + + sleep 1; + # Does it exist now? + ($output, $error, $return_code) = $anvil->Remote->call({ + target => $dr1_sn1_ip, + password => $anvil_password, + shell_call => $lv_check_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + if ($output eq "create") + { + my $variables = { lv_path => $lv_path }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0349", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0349", + variables => $variables, + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Create the DRBD meta data now. + $create_md = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { create_md => $create_md }}); + } + + if ($create_md) + { + my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address}; + my $drbd_md_call = $anvil->data->{path}{exe}{drbdadm}." --force create-md --max-peers=3 ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + dr1_sn1_ip => $dr1_sn1_ip, + drbd_md_call => $drbd_md_call, + }}); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $dr1_sn1_ip, + password => $anvil_password, + shell_call => $drbd_md_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # Reload the config. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0373"}); + $anvil->Job->update_progress({ + progress => 85, + message => "job_0373", + }); + my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either a node or a DR host + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $variables = { + server => $server_name, + host_name => $peer_host_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0374", variables => $variables}); + $anvil->Job->update_progress({ + progress => 85, + message => "job_0374", + variables => $variables, + }); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_sn_ip, + password => $anvil_password, + shell_call => $shell_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # If the resource is down, bring it up. + $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0375", variables => $variables}); + $anvil->Job->update_progress({ + progress => 88, + message => "job_0375", + variables => $variables, + }); + my $drbd_up_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." || ".$anvil->data->{path}{exe}{drbdadm}." up ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_up_call => $drbd_up_call }}); + ($output, $return_code) = $anvil->System->call({shell_call => $drbd_up_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either a node or a DR host + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + $variables = { host_name => $peer_host_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0376", variables => $variables}); + $anvil->Job->update_progress({ + progress => 90, + message => "job_0376", + variables => $variables, + }); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_sn_ip, + password => $anvil_password, + shell_call => $drbd_up_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # Now watch until the DR host shows up + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0377"}); + $anvil->Job->update_progress({ + progress => 92, + message => "job_0377", + }); + my $waiting = 1; + while($waiting) + { + sleep 5; + $anvil->DRBD->gather_data({debug => 2}); + + my $dr_seen = 1; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume => $volume }}); + if (exists $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}) + { + my $local_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{local_role}; + my $local_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{local_disk_state}; + my $peer_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{peer_role}; + my $peer_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{peer_disk_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_role => $local_role, + local_disk_state => $local_disk_state, + peer_role => $peer_role, + peer_disk_state => $peer_disk_state, + }}); + } + else + { + # Not up yet. + my $next_check = $anvil->Get->date_and_time({offset => 5, time_only => 1}); + my $variables = { next_check => $next_check }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0378", variables => $variables}); + $anvil->Job->update_progress({ + progress => 95, + message => "job_0378", + variables => $variables, + }); + $dr_seen = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_seen => $dr_seen }}); + last; + } + } + + if ($dr_seen) + { + # We're ready. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0379"}); + $anvil->Job->update_progress({ + progress => 98, + message => "job_0379", + }); + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + + # Done! + $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0380", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "job_0380", + variables => $variables, + }); + + return(0); +} + +sub load_job +{ + my ($anvil) = @_; + + $anvil->Job->clear(); + $anvil->Job->get_job_details(); + $anvil->Job->update_progress({ + progress => 1, + job_picked_up_by => $$, + job_picked_up_at => time, + message => "message_0267", + }); + + print "Loading the job: [".$anvil->data->{switches}{'job-uuid'}."]...\n"; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "jobs::job_command" => $anvil->data->{jobs}{job_command}, + "jobs::job_data" => $anvil->data->{jobs}{job_data}, + "jobs::job_progress" => $anvil->data->{jobs}{job_progress}, + "jobs::job_status" => $anvil->data->{jobs}{job_status}, + }}); + + # Break up the job data into switches. + $anvil->data->{switches}{Yes} = 1; + print "- Set the switch: [--Yes] to true.\n"; + foreach my $line (split/\n/, $anvil->data->{jobs}{job_data}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + if ($line =~ /(.*?)=(.*)$/) + { + my $key = $1; + my $value = $2; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + key => $key, + value => $value, + }}); + + $anvil->data->{switches}{$key} = $value; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "switches::${key}" => $anvil->data->{switches}{$key}, + }}); + + print "* Set the switch: [--".$key."] to: [".$value."]\n"; + } + } + print "Job loaded successfully.\n\n"; + + return(0); +} diff --git a/tools/anvil-manage-firewall b/tools/anvil-manage-firewall index 27361ec2..64ba4075 100755 --- a/tools/anvil-manage-firewall +++ b/tools/anvil-manage-firewall @@ -48,7 +48,7 @@ if (not $anvil->data->{sys}{manage}{firewall}) # Do nothing. $anvil->nice_exit({exit_code => 0}); } -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); # Read switches $anvil->data->{switches}{'y'} = ""; diff --git a/tools/anvil-parse-fence-agents b/tools/anvil-parse-fence-agents index 6c1a1a73..c4ee440b 100755 --- a/tools/anvil-parse-fence-agents +++ b/tools/anvil-parse-fence-agents @@ -27,7 +27,7 @@ my $anvil = Anvil::Tools->new(); $anvil->data->{switches}{refresh} = 0; $anvil->Get->switches; -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "switches::refresh" => $anvil->data->{switches}{refresh}, }}); diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index a999fe64..0ddee354 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -161,8 +161,8 @@ sub run_jobs # Sanity checks passed $anvil->Job->update_progress({ - progress => 10, - message => "job_0185", + progress => 10, + message => "job_0185", }); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0185"}); diff --git a/tools/striker-manage-install-target b/tools/striker-manage-install-target index c166b04b..6fa6f859 100755 --- a/tools/striker-manage-install-target +++ b/tools/striker-manage-install-target @@ -76,7 +76,7 @@ $anvil->data->{switches}{'no-refresh'} = 0; $anvil->data->{switches}{refresh} = 0; $anvil->data->{switches}{status} = ""; $anvil->Get->switches(); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'switches::check' => $anvil->data->{switches}{check}, 'switches::disable' => $anvil->data->{switches}{disable}, @@ -176,7 +176,7 @@ if ($anvil->data->{switches}{disable}) # Exit if we're not configured yet my $configured = $anvil->System->check_if_configured; -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { configured => $configured }}); if (not $configured) { print $anvil->Words->string({key => "error_0046"})."\n"; @@ -584,7 +584,7 @@ sub setup_boot_environment } ### PXE UEFI 'grub.cfg' file. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "host_os::os_type" => $anvil->data->{host_os}{os_type} }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "host_os::os_type" => $anvil->data->{host_os}{os_type} }}); my $say_os = "#!string!brand_0010!#"; if ($anvil->data->{host_os}{os_type} eq "centos8") { @@ -594,7 +594,7 @@ sub setup_boot_environment { $say_os = "#!string!brand_0012!#"; } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_os => $say_os }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_os => $say_os }}); my $uefi_grub_body = $anvil->Template->get({file => "pxe.txt", show_name => 0, name => "tftp_grub", variables => { base_url => $base_url, @@ -2735,7 +2735,7 @@ sub load_packages my ($os_type, $os_arch) = $anvil->Get->os_type(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { os_type => $os_type, os_arch => $os_arch, }}); @@ -2772,7 +2772,7 @@ sub load_packages my $rhel8_test_source = $anvil->data->{path}{directories}{html}."/rhel8"; my $centos8_test_source = $anvil->data->{path}{directories}{html}."/centos8"; my $centos_stream8_test_source = $anvil->data->{path}{directories}{html}."/centos-stream8"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { rhel8_test_source => $rhel8_test_source, centos8_test_source => $centos8_test_source, centos_stream8_test_source => $centos_stream8_test_source, diff --git a/tools/striker-prep-database b/tools/striker-prep-database index c0353a05..e5b62cc0 100755 --- a/tools/striker-prep-database +++ b/tools/striker-prep-database @@ -199,9 +199,9 @@ if ($local_uuid) # Started the daemon. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0059"}); - # Make sure it is enabled on boot. - my $return_code = $anvil->System->enable_daemon({debug => 2, daemon => $anvil->data->{sys}{daemon}{postgresql}}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); + ### NOTE: We no longer enable postgres on boot. When the first call is made to + ### Database->connect on a striker, and no databases are available, it will + ### start up the local daemon then. } else {