diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 4cd64e43..8fab1091 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1095,7 +1095,6 @@ sub _set_paths bonds => "/proc/net/bonding", 'cgi-bin' => "/var/www/cgi-bin", drbd_resources => "/etc/drbd.d/", - drbd_kernel_proc => "/sys/kernel/debug/drbd/resources", fence_agents => "/usr/sbin", firewalld_services => "/usr/lib/firewalld/services", firewalld_zones_etc => "/etc/firewalld/zones", # Changes when firewall-cmd ... --permanent is used. @@ -1219,6 +1218,7 @@ sub _set_paths lvchange => "/usr/sbin/lvchange", lvcreate => "/usr/sbin/lvcreate", lvdisplay => "/usr/sbin/lvdisplay", + lvextend => "/usr/sbin/lvextend", lvremove => "/usr/sbin/lvremove", lvrename => "/usr/sbin/lvrename", lvs => "/usr/sbin/lvs", diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 068140c2..f6c0ebe6 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -5061,9 +5061,9 @@ FROM $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_uuid} = $scan_lvm_vg_uuid; $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_internal_uuid} = $scan_lvm_vg_internal_uuid; $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_attributes} = $scan_lvm_vg_attributes; - $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_extent_size} = $scan_lvm_vg_extent_size." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $scan_lvm_vg_extent_size}).")"; - $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_size} = $scan_lvm_vg_size." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $scan_lvm_vg_size}).")"; - $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_free} = $scan_lvm_vg_free." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $scan_lvm_vg_free}).")"; + $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_extent_size} = $scan_lvm_vg_extent_size; + $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_size} = $scan_lvm_vg_size; + $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_free} = $scan_lvm_vg_free; $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{storage_group_uuid} = $storage_group_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "lvm::host_name::${short_host_name}::vg::${scan_lvm_vg_name}::scan_lvm_vg_uuid" => $anvil->data->{lvm}{host_name}{$short_host_name}{vg}{$scan_lvm_vg_name}{scan_lvm_vg_uuid}, @@ -5962,6 +5962,7 @@ ORDER BY $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_size} = 0; $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free} = 0; $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{storage_group_member_note} = $storage_group_member_note; + $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space} = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::group_name" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name}, "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::short_host_name::${storage_group_member_host_name}::host_uuid" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{short_host_name}{$storage_group_member_host_name}{host_uuid}, @@ -5969,6 +5970,7 @@ ORDER BY "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::host_uuid::${storage_group_member_host_uuid}::vg_size" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_size}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_size}}).")", "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::host_uuid::${storage_group_member_host_uuid}::vg_free" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free}}).")", "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::host_uuid::${storage_group_member_host_uuid}::storage_group_member_note" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{storage_group_member_note}, + "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::free_space" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space}, }}); # Make it easier to use the VG UUID to find the storage_group_uuid. @@ -6015,6 +6017,15 @@ WHERE "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::host_uuid::${storage_group_member_host_uuid}::vg_size" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_size}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_size}}).")", "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::host_uuid::${storage_group_member_host_uuid}::vg_free" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free}}).")", }}); + + if (($anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space} == 0) or + ($anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free} < $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space})) + { + $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space} = $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$storage_group_member_host_uuid}{vg_free}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "storage_groups::anvil_uuid::${storage_group_anvil_uuid}::storage_group_uuid::${storage_group_uuid}::free_space" => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{storage_groups}{anvil_uuid}{$storage_group_anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{free_space}}).")", + }}); + } } } @@ -10253,6 +10264,8 @@ AND job_data = ".$anvil->Database->quote($job_data)." AND job_host_uuid = ".$anvil->Database->quote($job_host_uuid)." +AND + job_progress != 100 ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); diff --git a/Anvil/Tools/Email.pm b/Anvil/Tools/Email.pm index 4ec97fcd..e0d9add2 100644 --- a/Anvil/Tools/Email.pm +++ b/Anvil/Tools/Email.pm @@ -303,11 +303,8 @@ sub get_current_server my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Email->get_current_server()" }}); - if (not exists $anvil->data->{mail_servers}{mail_server}) - { - # Try loading the mail server data. - $anvil->Database->get_mail_servers({debug => $debug}); - } + # Try loading the mail server data. + $anvil->Database->get_mail_servers({debug => $debug}); my $newest_mail_server_time = 0; my $newest_mail_server_uuid = ""; diff --git a/Anvil/Tools/Get.pm b/Anvil/Tools/Get.pm index 348eaf13..d0101457 100644 --- a/Anvil/Tools/Get.pm +++ b/Anvil/Tools/Get.pm @@ -801,6 +801,7 @@ ORDER BY } } + # Get storage group data now. foreach my $storage_group_uuid (keys %{$anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}}) { $anvil->data->{anvil_resources}{$anvil_uuid}{storage_group}{$storage_group_uuid}{group_name} = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name}; diff --git a/man/Makefile.am b/man/Makefile.am index db18f70f..0150315a 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -21,6 +21,7 @@ dist_man8_MANS = \ anvil-manage-server.8 \ anvil-manage-server-storage.8 \ anvil-manage-storage-groups.8 \ + anvil-watch-drbd.8 \ scancore.8 \ striker-check-machines.8 \ striker-initialize-host.8 diff --git a/man/anvil-watch-drbd.8 b/man/anvil-watch-drbd.8 new file mode 100644 index 00000000..959ba3fc --- /dev/null +++ b/man/anvil-watch-drbd.8 @@ -0,0 +1,30 @@ +.\" Displays the status of DRBD replication in a concise manner +.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. +.TH anvil-watch-drbd "8" "June 22 2023" "Anvil! Intelligent Availability™ Platform" +.SH NAME +anvil-watch-drbd \- This program displays the status of DRBD connections in a concise format. +.SH SYNOPSIS +.B anvil-watch-drbd +\fI\, \/\fR[\fI\,options\/\fR] +.SH DESCRIPTION +This program looks at the various 'proc/drbd' files related to DRBD 9 connections and displays them in a concise manner. It optionally can provide a continuously updating view of the DRBD status, useful for monitoring resyncs. +.TP +.SH OPTIONS +.TP +\-?, \-h, \fB\-\-help\fR +Show this man page. +.TP +\fB\-\-log-secure\fR +When logging, record sensitive data, like passwords. +.TP +\-v, \-vv, \-vvv +Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. +.SS "Commands:" +.TP +\fB\-\-watch\fR +When used alone, it tells the program to stay running, updating the view of the DRBD resources every two (2) seconds. Optionally, an integer can be passed that sets how frequently the display is updated. +.IP +.SH AUTHOR +Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. +.SH "REPORTING BUGS" +Report bugs to users@clusterlabs.org diff --git a/scancore-agents/scan-server/scan-server b/scancore-agents/scan-server/scan-server index 090bbd33..4366513c 100755 --- a/scancore-agents/scan-server/scan-server +++ b/scancore-agents/scan-server/scan-server @@ -339,6 +339,34 @@ sub collect_data { my $server_definition = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_definition => $server_definition }}); + + # Make very sure the definition XML is valid + if (not $server_definition) + { + # Asked to write an empty definition file! + my $variables = { + server => $server_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_server_alert_0020", variables => $variables }); + $anvil->Alert->register({alert_level => "notice", message => "scan_server_alert_0020", variables => $variables, set_by => $THIS_FILE}); + $anvil->nice_exit({exit_code => 1}); + } + + local $@; + my $xml = XML::Simple->new(); + my $server_xml = ""; + my $test = eval { $server_xml = $xml->XMLin($server_definition, KeyAttr => {}, ForceArray => 1) }; + if (not $test) + { + chomp $@; + my $error = "[ Error ] - The was a problem parsing: [".$server_definition."]. The error was:\n"; + $error .= "===========================================================\n"; + $error .= $@."\n"; + $error .= "===========================================================\n"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }}); + return(1); + } + if ($server_definition) { # Register an alert. @@ -402,6 +430,7 @@ DELETED - Marks a server as no longer existing # Parse out the server UUID. my $virsh_definition = get_and_parse_virsh_definition($anvil, $server_name); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_definition => $virsh_definition }}); # Does the XML definition file exist yet? my $xml_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml"; @@ -410,13 +439,13 @@ DELETED - Marks a server as no longer existing { # No, generate it. This will also load and parse the file after it's written. my $on_disk_definition = update_on_disk_definition($anvil, $server_name, $virsh_definition); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { on_disk_definition => $on_disk_definition }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { on_disk_definition => $on_disk_definition }}); } else { # Yes, parse it. my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { on_disk_definition => $on_disk_definition }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { on_disk_definition => $on_disk_definition }}); } # We'll compare the memory allocated to the server from the on-disk definition and the memory @@ -540,19 +569,30 @@ DELETED - Marks a server as no longer existing return_code => $return_code, }}); - # The definition may have certainly changed, so update it in case - # needed. - update_definitions_from_virsh($anvil, $server_name, $server_uuid, $virsh_definition); - - # Now undefine the server - $shell_call = $anvil->data->{path}{exe}{setsid}." --wait ".$anvil->data->{path}{exe}{virsh}." undefine ".$server_name; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + # Make sure the definition we read was valid. + my $problem = $anvil->Server->parse_definition({ + server => $server_name, + source => "from_virsh", + definition => $virsh_definition, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - (my $output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - output => $output, - return_code => $return_code, - }}); + if (not $problem) + { + # The definition may have certainly changed, so update it in case + # needed. + update_definitions_from_virsh($anvil, $server_name, $server_uuid, $virsh_definition); + + # Now undefine the server + $shell_call = $anvil->data->{path}{exe}{setsid}." --wait ".$anvil->data->{path}{exe}{virsh}." undefine ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + + (my $output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + } } # Set the boot time back to zero. @@ -588,7 +628,7 @@ DELETED - Marks a server as no longer existing my $virsh_definition = get_and_parse_virsh_definition($anvil, $server_name); my $database_definition = get_and_parse_database_definition($anvil, $server_name, $server_uuid); my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_definition => $virsh_definition, database_definition => $database_definition, on_disk_definition => $on_disk_definition, @@ -693,11 +733,18 @@ DELETED - Marks a server as no longer existing my ($virsh_live_definition, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $virsh_definition, return_code => $return_code }}); - $anvil->Server->parse_definition({ + my $problem = $anvil->Server->parse_definition({ server => $server_name, source => "from_live_virsh", definition => $virsh_live_definition, }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + + if ($problem) + { + $virsh_live_definition = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_live_definition => $virsh_live_definition }}); + } # Now that definition updates are dealth with, has anything else changed? my $server_state = $anvil->data->{'scan-server'}{server_name}{$server_name}{server_state}; @@ -937,7 +984,7 @@ DELETED - Marks a server as no longer existing # Still here? check the age of the file on disk and the age of the database entry. my $database_definition = get_and_parse_database_definition($anvil, $server_name, $server_uuid); my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { database_definition => $database_definition, on_disk_definition => $on_disk_definition, }}); @@ -1114,11 +1161,18 @@ sub get_and_parse_database_definition my $database_definition = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { database_definition => $database_definition }}); - $anvil->Server->parse_definition({ + my $problem = $anvil->Server->parse_definition({ server => $server_name, source => "from_db", definition => $database_definition, }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); + if ($problem) + { + # The definition is not valid. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_server_log_0003", variables => { definition => $database_definition }}); + $database_definition = ""; + } return($database_definition); } @@ -1136,11 +1190,19 @@ sub get_and_parse_disk_definition $anvil->Storage->get_file_stats({file_path => $xml_file}); - $anvil->Server->parse_definition({ + my $problem = $anvil->Server->parse_definition({ + debug => 2, server => $server_name, source => "from_disk", definition => $on_disk_definition, }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); + if ($problem) + { + # The definition is not valid. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_server_log_0003", variables => { definition => $on_disk_definition }}); + $on_disk_definition = ""; + } return($on_disk_definition); } @@ -1160,11 +1222,19 @@ sub get_and_parse_virsh_definition return_code => $return_code, }}); - $anvil->Server->parse_definition({ + my $problem = $anvil->Server->parse_definition({ + debug => 2, server => $server_name, source => "from_virsh", definition => $virsh_definition, }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); + if ($problem) + { + # The definition is not valid. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_server_log_0003", variables => { definition => $virsh_definition }}); + $virsh_definition = ""; + } return($virsh_definition); } @@ -1201,6 +1271,7 @@ sub redefine_server_from_disk # Re-read and parse the new (inactive) definition my $virsh_definition = get_and_parse_virsh_definition($anvil, $server_name); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_definition => $virsh_definition }}); return($virsh_definition); } @@ -1212,6 +1283,7 @@ sub update_definitions_from_virsh my ($anvil, $server_name, $server_uuid, $virsh_definition) = @_; my $problem = $anvil->Server->parse_definition({ + debug => 2, server => $server_name, source => "from_virsh", definition => $virsh_definition, @@ -1226,21 +1298,22 @@ sub update_definitions_from_virsh server_definition_xml => $virsh_definition, server_definition_server_uuid => $server_uuid, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server_definition_uuid => $server_definition_uuid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_definition_uuid => $server_definition_uuid }}); # Reload the database definitions $anvil->Database->get_server_definitions(); my $xml_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { xml_file => $xml_file }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { xml_file => $xml_file }}); # Just write out the file. If nothing else, it'll update the mtime. my $return = $anvil->Storage->write_file({ + debug => 2, body => $virsh_definition, file => $xml_file, overwrite => 1, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'return' => $return }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'return' => $return }}); } return(0); @@ -1274,6 +1347,33 @@ sub update_on_disk_definition { my ($anvil, $server_name, $new_definition) = @_; + # Make very sure the definition XML is valid + if (not $new_definition) + { + # Asked to write an empty definition file! + my $variables = { + server => $server_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_server_alert_0020", variables => $variables }); + $anvil->Alert->register({alert_level => "notice", message => "scan_server_alert_0020", variables => $variables, set_by => $THIS_FILE}); + $anvil->nice_exit({exit_code => 1}); + } + + local $@; + my $xml = XML::Simple->new(); + my $server_xml = ""; + my $test = eval { $server_xml = $xml->XMLin($new_definition, KeyAttr => {}, ForceArray => 1) }; + if (not $test) + { + chomp $@; + my $error = "[ Error ] - The was a problem parsing: [".$new_definition."]. The error was:\n"; + $error .= "===========================================================\n"; + $error .= $@."\n"; + $error .= "===========================================================\n"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", list => { error => $error }}); + return(1); + } + my $xml_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { xml_file => $xml_file }}); @@ -1290,6 +1390,7 @@ sub update_on_disk_definition # Read the file back in and parse it. my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { on_disk_definition => $on_disk_definition }}); return($on_disk_definition); } diff --git a/scancore-agents/scan-server/scan-server.xml b/scancore-agents/scan-server/scan-server.xml index cd11c05f..1e2f5c6f 100644 --- a/scancore-agents/scan-server/scan-server.xml +++ b/scancore-agents/scan-server/scan-server.xml @@ -113,10 +113,17 @@ The definition for the server: [#!variable!server!#] was changed in the database There was no DRBD fence rule for the: [#!variable!server!#] in the pacemaker configuration. Adding it now. + I was asked to write an empty definition for the server: [#!variable!server!#], this must be a program error. Exiting. Starting: [#!variable!program!#]. This host is a: [#!variable!host_type!#], this agent is only useful on nodes and DR hosts. Exiting. + +The following definition file failed to parse and is likely corrupt. +==== +#!variable!definition!# +==== + The server: [#!variable!server!#] is migrating, skipping scanning it. diff --git a/share/words.xml b/share/words.xml index 3a4baadf..859feff1 100644 --- a/share/words.xml +++ b/share/words.xml @@ -600,7 +600,7 @@ The error was: #!variable!error!# ======== - There was a problem with finding a common storage network between: [#!variable!node1_name!#] and: [#!variable!node2_name!#] using the common interface: [#!variable!interface!#]. Found node 1 to have the IP: [#!variable!node1_ip!#] and node 2: [#!variable!node2_ip!#]. Is there a problem with '/etc/hosts'? + There was a problem with finding a common storage network between: [#!variable!node1_name!#] and: [#!variable!node2_name!#]. Found node 1 to have the IP: [#!variable!node1_ip!#] and node 2: [#!variable!node2_ip!#]. Is there a problem with '/etc/hosts'? Failed to find a network to use for storage replication. Is there a problem with '/etc/hosts'? diff --git a/tools/Makefile.am b/tools/Makefile.am index d54aecb2..cd2b412f 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,8 +1,6 @@ MAINTAINERCLEANFILES = Makefile.in # XXX recheck after rebase! -EXTRA_DIST = \ - watch_drbd dist_sbin_SCRIPTS = \ anvil-access-module \ @@ -47,6 +45,7 @@ dist_sbin_SCRIPTS = \ anvil-update-system \ anvil-version-changes \ anvil-watch-bonds \ + anvil-watch-drbd \ anvil-watch-power \ scancore \ striker-auto-initialize-all \ diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 2116e9a7..faf573d4 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -252,7 +252,7 @@ sub check_ram }}); if ($problem) { - # See if an [anvil-sync-shared' job is running and, if so, don't exit. The file copy is + # See if an 'anvil-sync-shared' job is running and, if so, don't exit. The file copy is # counted and not an actual problem. $anvil->Database->get_jobs({debug => 2}); foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}}) diff --git a/tools/anvil-manage-server-storage b/tools/anvil-manage-server-storage index 2f956b28..11505c99 100755 --- a/tools/anvil-manage-server-storage +++ b/tools/anvil-manage-server-storage @@ -47,6 +47,7 @@ $anvil->Log->secure({set => 1}); $anvil->Get->switches({list => [ "add", "anvil", + "confirm", "disk", "eject", "grow", @@ -165,10 +166,12 @@ sub manage_disk { my ($anvil) = @_; + my $anvil_uuid = defined $anvil->data->{switches}{anvil_uuid} ? $anvil->data->{switches}{anvil_uuid} : $anvil->Cluster->get_anvil_uuid(); my $short_host_name = $anvil->Get->short_host_name; my $server_name = $anvil->data->{switches}{server_name}; my $from_source = get_definition_source($anvil); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, short_host_name => $short_host_name, server_name => $server_name, from_source => $from_source, @@ -199,7 +202,7 @@ sub manage_disk } } - my $drbd_resource = load_storage($anvil); + my $drbd_resource = load_storage($anvil); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_resource => $drbd_resource }}); foreach my $host_type ("node", "dr") { @@ -257,12 +260,33 @@ sub manage_disk } } } + + # How much space can this LV grow into (this factors DR is already protected). + $anvil->data->{server_name}{$server_name}{drbd_resource}{$drbd_resource}{volume}{$volume_number}{free_space} = get_max_free_space($anvil, $drbd_resource, $volume_number); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "server_name::${server_name}::drbd_resource::${drbd_resource}::volume::${volume_number}::free_space" => $anvil->data->{server_name}{$server_name}{drbd_resource}{$drbd_resource}{volume}{$volume_number}{free_space}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{server_name}{$server_name}{drbd_resource}{$drbd_resource}{volume}{$volume_number}{free_space}}).")", + }}); } } } my $device_target = $anvil->data->{switches}{disk}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { device_target => $device_target }}); + + if ($anvil->data->{switches}{disk} eq "#!SET!#") + { + # User didn't specify a device. + show_server_details($anvil); + print "\n[ Error ] - Please specify the disk drive target you want to work on.\n"; + $anvil->nice_exit({exit_code => 1}); + } + elsif (not exists $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{disk}{target}{$device_target}) + { + # Invalid device target + show_server_details($anvil); + print "\n[ Error ] - The disk drive target: [".$device_target."] wasn't found.\n"; + $anvil->nice_exit({exit_code => 1}); + } my $device = "disk"; my $alias = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{$device}{target}{$device_target}{alias}; @@ -280,6 +304,9 @@ sub manage_disk my $device_path = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{$device}{target}{$device_target}{path}; my $driver_io = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{$device}{target}{$device_target}{driver}{io}; my $driver_cache = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{$device}{target}{$device_target}{driver}{cache}; + my $on_lv = $anvil->data->{server}{$short_host_name}{$server_name}{device}{$device_path}{on_lv}; + my $drbd_volume = $anvil->data->{lvm}{host_name}{$short_host_name}{lv_path}{$on_lv}{drbd}{volume}; + my $max_free_space = $anvil->data->{server_name}{$server_name}{drbd_resource}{$drbd_resource}{volume}{$drbd_volume}{free_space}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's01:device_target' => $device_target, 's02:alias' => $alias, @@ -297,9 +324,14 @@ sub manage_disk 's14:device_path' => $device_path, 's15:driver_io' => $driver_io, 's16:driver_cache' => $driver_cache, + 's17:on_lv' => $on_lv, + 's18:drbd_volume' => $drbd_volume, + 's19:max_free_space' => $max_free_space." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space}).")", }}); - print "- Target: [".$device_target."], boot: [".$say_boot."], path: [".$device_path."], cache: [".$driver_cache."], driver type: [".$driver_type."]\n"; + #print "- Target: [".$device_target."], boot: [".$say_boot."], path: [".$device_path."], cache: [".$driver_cache."], driver type: [".$driver_type."]\n"; + print "- Target: [".$device_target."], boot: [".$say_boot."], path: [".$device_path."], Available space: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space})."]\n"; +=cut my $volume = ""; print "Sub-Nodes:\n"; @@ -318,9 +350,6 @@ sub manage_disk foreach my $drbd_resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$short_host_name}{$server_name}{drbd}{resource}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_resource => $drbd_resource }}); - - # Get the DRBD volume data - load_drbd_data($anvil, $drbd_resource); } foreach my $host_type ("node", "dr") { @@ -351,15 +380,293 @@ sub manage_disk } } } +=cut # What are we doing? if ($anvil->data->{switches}{grow}) { + # Are they asking for an available amount of space? + my $error_note = q| +[ Note ] - The size can be in percent, ie: '50%' or '100%', a number in bytes, or a human-readable size. + - Human readable sizes must NOT have a space between the number and letter suffix. Also, base2 + - vs base10 notation! Ie: '1GiB' = 1,073,741,824 bytes', '1GB' == '1,000,000,000 bytes'. A single + - letter used to denote size will be interpreted as base2. ie: '1G == 1GiB'. +|; + + my $add_size = $anvil->data->{switches}{grow}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { add_size => $add_size }}); + if ($add_size =~ /^(\d+)%$/) + { + # This is valid + my $percent = ".".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { percent => $percent }}); + + $add_size = int($max_free_space * $percent); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { add_size => $add_size }}); + } + elsif ($add_size !~ /\d/) + { + # No digits, probably didn't set a value at all. + print "\n[ Error ] - Please specify the size you would like to grow this disk by. The maximum size is: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space})."].\n"; + print $error_note."\n"; + $anvil->nice_exit({exit_code => 1}); + } + elsif ($add_size !~ /^\d+$/) + { + # Size is not in bytes, try to convert it. + my $bytes = $anvil->Convert->human_readable_to_bytes({ + debug => 2, + size => $add_size, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'bytes' => $bytes }}); + if ($bytes =~ /^\d+$/) + { + $add_size = $bytes; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { add_size => $add_size }}); + } + else + { + # Not a valid size. + print "\n[ Error ] - The requested size: [".$add_size."] could not be interpreted.\n"; + print $error_note."\n"; + $anvil->nice_exit({exit_code => 1}); + } + } + + # Make sure they're asking for a reasonable size + if ($add_size < 4194304) + { + # Must be a typo, this is less than the size of a single extent. + print "\n[ Error ] - The requested size: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $add_size})."] is too small, it's less than an single extent.\n"; + print $error_note."\n"; + $anvil->nice_exit({exit_code => 1}); + } + elsif ($add_size > $max_free_space) + { + # Not enough space. + print "\n[ Error ] - The requested size: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $add_size})."] is too large. The available size is: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space})."]\n"; + print $error_note."\n"; + $anvil->nice_exit({exit_code => 1}); + } + ### TODO: Make this work without the peer node being online. # The server is allowed to be running, but both nodes and any DR hosts this is replicating to # needs to be online. + my $all_online = check_drbd_peer_access($anvil, $from_source, $drbd_volume); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_online => $all_online }}); + + if (not $all_online) + { + print "\n[ Error ] - Growing the storage requires all peers to be online.\n"; + foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{peer}}) + { + my $say_access = $anvil->data->{peer}{$short_host_name}{access_ip} ? "up." : "down!"; + print " - Peer: [".$short_host_name."] is ".$say_access."\n"; + } + $anvil->nice_exit({exit_code => 1}); + } + + # Still here? We're good to go. + my $lv_command_size = 0; + my $hr_size = $anvil->Convert->bytes_to_human_readable({'bytes' => $add_size}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hr_size => $hr_size }}); + if ($add_size eq "100%") + { + # This is valid + $add_size = "-l +100\%FREE"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { add_size => $add_size }}); + } + else + { + $hr_size =~ s/\s+//g; + $add_size = "-L +".$hr_size; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { add_size => $add_size }}); + } + + print "- Preparing to grow the storage by: [".$hr_size."]...\n"; + if (not $anvil->data->{switches}{confirm}) + { + print $anvil->Words->string({key => "message_0059"})." "; + my $answer = ; + chomp($answer); + if ($answer !~ /^y/i) + { + print "Aborting.\n"; + $anvil->nice_exit({exit_code => 0}); + } + + # Test that we've lost access while waiting for the answer. + my $all_online = check_drbd_peer_access($anvil, $from_source, $drbd_volume); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_online => $all_online }}); + + if (not $all_online) + { + print "\n[ Error ] - It would appear that we've lost access to a peer while waiting for the answer.\n"; + foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{peer}}) + { + my $say_access = $anvil->data->{peer}{$short_host_name}{access_ip} ? "up." : "down!"; + print " - Peer: [".$short_host_name."] is ".$say_access."\n"; + } + $anvil->nice_exit({exit_code => 1}); + } + } + + foreach my $host_type ("node", "dr") + { + foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}}) + { + my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid}; + my $backing_disk = $anvil->data->{new}{resource}{$drbd_resource}{host_uuid}{$host_uuid}{volume_number}{$drbd_volume}{backing_disk}; + my $shell_call = $anvil->data->{path}{exe}{lvextend}." ".$add_size." ".$backing_disk; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:short_host_name' => $short_host_name, + 's2:host_uuid' => $host_uuid, + 's3:backing_disk' => $backing_disk, + 's4:shell_call' => $shell_call, + }}); + if ($host_uuid eq $anvil->Get->host_uuid) + { + print " - Extending local LV: [".$backing_disk."]..."; + my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + print " Error!\n"; + print "[ FAILED ] - When trying to grow the local logical volume: [".$backing_disk."]\n"; + print "[ FAILED ] - using the command: [".$shell_call."]\n"; + print "[ FAILED ] - The return code: [".$return_code."] was received, expected '0'. Output, if any:\n"; + print "==========\n"; + print $output."\n"; + print "==========\n"; + print "The extension of the resource is incomplete, manual intervention is required!!\n"; + print "[ Warning ] - Do NOT re-run this command! The backing devices may not have mis-matched sized!\n"; + $anvil->nice_exit({exit_code => 1}); + } + else + { + print " Done!\n"; + } + } + else + { + my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; + my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; + print " - Extending peer: [".$short_host_name.":".$backing_disk."], via: [".$use_ip." (".$use_network.")]"; + my ($output, $error, $return_code) = $anvil->Remote->call({ + shell_call => $shell_call, + target => $use_ip, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + if ($return_code) + { + print " Error!\n"; + print "[ FAILED ] - When trying to grow the peer's logical volume: [".$backing_disk."]\n"; + print "[ FAILED ] - using the command: [".$shell_call."]\n"; + print "[ FAILED ] - The return code: [".$return_code."] was received, expected '0'. Output, if any:\n"; + print "==] STDOUT [========\n"; + print $output."\n"; + print "==] STDERR [========\n"; + print $error."\n"; + print "====================\n"; + print "The extension of the resource is incomplete, manual intervention is required!!\n"; + print "[ Warning ] - Do NOT re-run this command! The backing devices may not have mis-matched sized!\n"; + $anvil->nice_exit({exit_code => 1}); + } + else + { + print " Done!\n"; + } + } + } + } - # Process the DRBD config to get the backing LVs. + # Locally, we'll call DRBD to resize. + print "- Extending backing devices complete. Now extending DRBD resource/volume... "; + my $shell_call = $anvil->data->{path}{exe}{drbdadm}." resize ".$drbd_resource."/".$drbd_volume; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + print " Error!\n"; + print "[ FAILED ] - When trying to grow the DRBD device: [".$drbd_resource."/".$drbd_volume."]\n"; + print "[ FAILED ] - using the command: [".$shell_call."]\n"; + print "[ FAILED ] - The return code: [".$return_code."] was received, expected '0'. Output, if any:\n"; + print "==========\n"; + print $output."\n"; + print "==========\n"; + print "The extension of the resource is incomplete, manual intervention is required!!\n"; + print "[ Note ] - All backing devices have been grown. Manually resolving the drbd grow\n"; + print "[ Note ] - error should complete the drive expansion!\n"; + $anvil->nice_exit({exit_code => 1}); + } + else + { + print " Done!\n"; + } + + # Call scan-lvm and scan-drbd to make sure the databases are updated. + print "- Calling scancore agents to ensure the database has the new storage config recorded.\n"; + foreach my $agent ("scan-drbd", "scan-lvm") + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0740", variables => { agent => $agent }}); + + my $shell_call = $anvil->data->{path}{directories}{scan_agents}."/".$agent."/".$agent.$anvil->Log->switches(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + foreach my $host_type ("node", "dr") + { + foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}}) + { + my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:short_host_name' => $short_host_name, + 's2:host_uuid' => $host_uuid, + }}); + if ($host_uuid eq $anvil->Get->host_uuid) + { + print " - Running scan agent: [".$agent."] locally..."; + my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + print " Done!\n"; + } + else + { + my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; + my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; + print " - Running scan agent: [".$agent."] on: [".$short_host_name."] via: [".$use_ip." (".$use_network.")]..."; + my ($output, $error, $return_code) = $anvil->Remote->call({ + shell_call => $shell_call, + target => $use_ip, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + print " Done!\n"; + } + } + } + } + + print "[ Success ] - Expansion is complete!\n"; + print "[ Note ] - Depending on your OS, you may need to power the server off, and then power it back on\n"; + print "[ Note ] - for the new space to be visible. Typically, powering off the server from the guest OS\n"; + print "[ Note ] - and waiting for the Anvil! to boot it back up will do the job nicely.\n"; } return(0); @@ -406,6 +713,21 @@ sub manage_optical my $device_target = $anvil->data->{switches}{optical}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { device_target => $device_target }}); + if ($anvil->data->{switches}{optical} eq "#!SET!#") + { + # User didn't specify a device. + show_server_details($anvil); + print "\n[ Error ] - Please specify the optical disk target you want to work on.\n"; + $anvil->nice_exit({exit_code => 1}); + } + elsif (not exists $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{cdrom}{target}{$device_target}) + { + # Invalid device target + show_server_details($anvil); + print "\n[ Error ] - The optical device target: [".$device_target."] wasn't found.\n"; + $anvil->nice_exit({exit_code => 1}); + } + my $eject_first = 0; my $alias = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{cdrom}{target}{$device_target}{alias}; my $boot_order = $anvil->data->{server}{$short_host_name}{$server_name}{$from_source}{device}{cdrom}{target}{$device_target}{boot_order}; @@ -466,9 +788,25 @@ sub manage_optical { print "- Inserting: [".$anvil->data->{switches}{insert}."] into the drive: [".$device_target."].\n"; } + elsif ($anvil->data->{switches}{eject}) + { + print "- Ejecting: [".$device_path."] from: [".$device_target."].\n"; + } else { - print "- Ejecting: [".$anvil->data->{switches}{insert}."] from: [".$device_target."].\n"; + # Show the ISO in the drive. + if ($device_path) + { + print "- Drive: [".$device_target."] has the disc image: [".$device_path."] inserted currently.\n"; + print "- Use '--eject' to eject the disc.\n"; + print "- Use '--insert /mnt/shared/files/' to replace it with a different disc image.\n"; + } + else + { + print "- Drive: [".$device_target."] currently has no disc image inserted.\n"; + print "- Use '--insert /mnt/shared/files/' to insert a disc image.\n"; + } + $anvil->nice_exit({exit_code => 0}); } my ($output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { @@ -600,6 +938,7 @@ sub show_server_details my $lv_name = $anvil->data->{lvm}{host_name}{$short_host_name}{lv_path}{$on_lv}{scan_lvm_lv_name}; my $lv_size = $anvil->data->{lvm}{host_name}{$short_host_name}{lv}{$lv_name}{scan_lvm_lv_size}; my $metadata_size = $lv_size - $resource_size; + my $max_free_space = get_max_free_space($anvil, $drbd_resource, $drbd_volume); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's01:address_domain' => $address_domain, 's02:address_slot' => $address_slot, @@ -614,15 +953,12 @@ sub show_server_details 's11:resource_size' => $anvil->Convert->bytes_to_human_readable({'bytes' => $resource_size})." (".$anvil->Convert->add_commas({number => $resource_size}).")", 's12:lv_size' => $anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size})."] (".$anvil->Convert->add_commas({number => $lv_size}).")", 's13:metadata_size' => $anvil->Convert->bytes_to_human_readable({'bytes' => $metadata_size})."] (".$anvil->Convert->add_commas({number => $metadata_size}).")", + 's14:max_free_space' => $max_free_space." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space}).")", }}); print "- Target: [".$device_target."], boot: [".$say_boot."], Replication Volume: [".$drbd_resource."/".$drbd_volume."]\n"; - print " - Resource / LV / Metadata sizes: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $resource_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $metadata_size})."], cache: [".$driver_cache."], IO Policy: [".$driver_io."]\n"; - - # Get the backing LV from the DRBD resource. - - - #if ($anvil->data->{storage_groups}{vg_uuid}{$vg_uuid}{storage_group_uuid}) + print " |- Resource / LV / Metadata sizes: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $resource_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $metadata_size})."], free space: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space})."]\n"; + #print " |- Resource / LV / Metadata sizes: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $resource_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size})." / ".$anvil->Convert->bytes_to_human_readable({'bytes' => $metadata_size})."], free space: [".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space})."], cache: [".$driver_cache."], IO Policy: [".$driver_io."]\n"; } else { @@ -660,6 +996,114 @@ sub show_server_details return(0); } +sub check_drbd_peer_access +{ + my ($anvil, $drbd_resource, $drbd_volume) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's01:drbd_resource' => $drbd_resource, + 's02:drbd_volume' => $drbd_volume, + }}); + + my $all_online = 1; + foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}}) + { + my $host_uuid = $anvil->Get->host_uuid_from_name({debug => 2, host_name => $this_host}); + my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:this_host' => $this_host, + 's2:host_uuid' => $host_uuid, + 's3:short_host_name' => $short_host_name, + }}); + next if $host_uuid eq $anvil->Get->host_uuid; + + # This is used to store the IP we used to access the peer. If no access is available, this + # shows which hosts are not available. + $anvil->data->{peer}{$short_host_name}{access_ip} = ""; + $anvil->data->{peer}{$short_host_name}{access_network} = ""; + + my $access = 0; + my $matches = $anvil->Network->find_access({ + debug => 2, + target => $this_host, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }}); + foreach my $preferred_network ("bcn", "mn", "ifn", "sn") + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }}); + foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}}) + { + next if $access; + next if $network_name !~ /^$preferred_network/; + my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address}; + my $test_access = $anvil->Remote->test_access({target => $target_ip}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:network_name' => $network_name, + 's2:target_ip' => $target_ip, + 's3:test_access' => $test_access, + }}); + + if ($test_access) + { + # We're good. + $access = 1; + $anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip; + $anvil->data->{peer}{$short_host_name}{access}{network} = $network_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:access' => $access, + "s2:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip}, + "s3:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network}, + }}); + } + } + } + + if (not $access) + { + $all_online = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_online => $all_online }}); + } + } + + return($all_online); +} + +sub get_max_free_space +{ + my ($anvil, $drbd_resource, $drbd_volume) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's01:drbd_resource' => $drbd_resource, + 's02:drbd_volume' => $drbd_volume, + }}); + + my $max_free_space = 0; + foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}}) + { + my $drbd_path = $anvil->data->{drbd}{drbd_node}{$this_host}{config}{resource}{$drbd_resource}{volume}{$drbd_volume}{drbd_path}; + my $drbd_path_by_res = $anvil->data->{drbd}{drbd_node}{$this_host}{config}{resource}{$drbd_resource}{volume}{$drbd_volume}{drbd_path_by_res}; + my $backing_lv = $anvil->data->{drbd}{drbd_node}{$this_host}{config}{resource}{$drbd_resource}{volume}{$drbd_volume}{backing_lv}; + my $lv_name = $anvil->data->{lvm}{host_name}{$this_host}{lv_path}{$backing_lv}{scan_lvm_lv_name};; + my $on_vg = $anvil->data->{lvm}{host_name}{$this_host}{lv}{$lv_name}{scan_lvm_lv_on_vg}; + my $vg_free_space = $anvil->data->{lvm}{host_name}{$this_host}{vg}{$on_vg}{scan_lvm_vg_free}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's01:this_host' => $this_host, + 's02:drbd_path' => $drbd_path, + 's03:drbd_path_by_res' => $drbd_path_by_res, + 's04:backing_lv' => $backing_lv, + 's05:on_vg' => $on_vg, + 's06:vg_free_space' => $vg_free_space." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $vg_free_space}).")", + }}); + if ((not $max_free_space) or ($vg_free_space < $max_free_space)) + { + $max_free_space = $vg_free_space; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + max_free_space => $max_free_space." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_free_space}).")", + }}); + } + } + + return($max_free_space); +} + sub load_drbd_data { my ($anvil, $drbd_resource) = @_; @@ -735,7 +1179,7 @@ sub show_volume { my ($anvil, $drbd_resource, $host_type) = @_; - my $anvil_uuid = defined $anvil->data->{switches}{anvil_uuid} ? $anvil->data->{switches}{anvil_uuid} : ""; + my $anvil_uuid = defined $anvil->data->{switches}{anvil_uuid} ? $anvil->data->{switches}{anvil_uuid} : $anvil->Cluster->get_anvil_uuid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}}) { @@ -808,7 +1252,7 @@ sub show_volume if ($storage_group_uuid) { - my $vg_size = $anvil->data->{anvil_resources}{$anvil_uuid}{storage_group}{$storage_group_uuid}{vg_size}; + my $vg_size = $anvil->data->{anvil_resources}{$anvil_uuid}{storage_group}{$storage_group_uuid}{vg_size}; my $free_size = $anvil->data->{anvil_resources}{$anvil_uuid}{storage_group}{$storage_group_uuid}{free_size}; } } diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index dc9e9143..8bb55cfa 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -1200,7 +1200,7 @@ sub create_md return(0); } -# This finds which SN network and IPs we're using. +# This finds which SN (or BCN, IFN) network and IPs we're using. sub get_sn_details { my ($anvil) = @_; @@ -1208,62 +1208,75 @@ sub get_sn_details $anvil->data->{job}{node1_sn_ip} = ""; $anvil->data->{job}{node2_sn_ip} = ""; $anvil->data->{job}{sn_network} = ""; - $anvil->Network->load_ips({ - debug => 2, - host => $anvil->data->{job}{node1_short_host_name}, - host_uuid => $anvil->data->{job}{node1_host_uuid}, - }); - $anvil->Network->load_ips({ - debug => 2, - host => $anvil->data->{job}{node2_short_host_name}, - host_uuid => $anvil->data->{job}{node2_host_uuid}, - }); - my $match = $anvil->Network->find_matches({ - debug => 2, - first => $anvil->data->{job}{node1_short_host_name}, - second => $anvil->data->{job}{node2_short_host_name}, - source => $THIS_FILE, - line => __LINE__, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { match => $match }}); - $anvil->data->{job}{sn_network} = ""; - $anvil->data->{job}{node1_sn_ip} = ""; - $anvil->data->{job}{node2_sn_ip} = ""; my $node1_short_host_name = $anvil->data->{job}{node1_short_host_name}; + my $node1_host_uuid = $anvil->data->{job}{node1_host_uuid}; my $node2_short_host_name = $anvil->data->{job}{node2_short_host_name}; - my $matched_ips = keys %{$match}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matched_ips => $matched_ips }}); - foreach my $interface (sort {$a cmp $b} keys %{$match->{$node1_short_host_name}}) + my $node2_host_uuid = $anvil->data->{job}{node1_host_uuid}; + my $peer_short_name = $anvil->data->{job}{peer_short_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:node1_short_host_name" => $node1_short_host_name, + "s2:node1_host_uuid" => $node1_host_uuid, + "s3:node2_short_host_name" => $node2_short_host_name, + "s4:node2_host_uuid" => $node2_host_uuid, + "s5:peer_short_name" => $peer_short_name, + }}); + + # Look for a match on the SN, and failing that the BCN, and finally the IFN. We don't check the MN. + my $matches = $anvil->Network->find_access({ + debug => 2, + target => $peer_short_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }}); + foreach my $preferred_network ("sn") { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { interface => $interface }}); - if ($interface =~ /sn/) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }}); + foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}}) { - ### TODO: This always chooses SN1 at this time, we need to support (later) VM - ### build-time SN selection when 2+ SNs exist. - # Valid data? - if ((not $match->{$node1_short_host_name}{$interface}{ip}) or (not $match->{$node2_short_host_name}{$interface}{ip})) + next if $network_name !~ /^$preferred_network/; + + # Network and link speed + $anvil->data->{job}{sn_network} = uc($network_name); + $anvil->data->{job}{network_speed} = $anvil->data->{network_access}{$network_name}{local_speed} < $anvil->data->{network_access}{$network_name}{target_speed} ? $anvil->data->{network_access}{$network_name}{local_speed} : $anvil->data->{network_access}{$network_name}{target_speed}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:job::sn_network" => $anvil->data->{job}{sn_network}, + "s2:job::network_speed" => $anvil->data->{job}{network_speed}, + }}); + if ($node1_host_uuid eq $anvil->Get->host_uuid) + { + # We're node 1 + $anvil->data->{job}{node1_sn_ip} = $anvil->data->{network_access}{$network_name}{local_ip_address} ? $anvil->data->{network_access}{$network_name}{local_ip_address} : ""; + $anvil->data->{job}{node2_sn_ip} = $anvil->data->{network_access}{$network_name}{target_ip_address} ? $anvil->data->{network_access}{$network_name}{target_ip_address} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:job::node1_sn_ip" => $anvil->data->{job}{node1_sn_ip}, + "s2:job::node2_sn_ip" => $anvil->data->{job}{node2_sn_ip}, + }}); + } + else + { + # We're node 2 + $anvil->data->{job}{node1_sn_ip} = $anvil->data->{network_access}{$network_name}{target_ip_address} ? $anvil->data->{network_access}{$network_name}{target_ip_address} : ""; + $anvil->data->{job}{node2_sn_ip} = $anvil->data->{network_access}{$network_name}{local_ip_address} ? $anvil->data->{network_access}{$network_name}{local_ip_address} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s1:job::node1_sn_ip" => $anvil->data->{job}{node1_sn_ip}, + "s2:job::node2_sn_ip" => $anvil->data->{job}{node2_sn_ip}, + }}); + } + + if ((not $anvil->data->{job}{node1_sn_ip}) or (not $anvil->data->{job}{node2_sn_ip})) { - # Probably a bug, maybe a broken /etc/hosts file? $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0417", variables => { node1_name => $node1_short_host_name, node2_name => $node2_short_host_name, - interface => $interface, - node1_ip => defined $match->{$node1_short_host_name}{$interface}{ip} ? $match->{$node1_short_host_name}{$interface}{ip} : "", - node2_ip => defined $match->{$node2_short_host_name}{$interface}{ip} ? $match->{$node2_short_host_name}{$interface}{ip} : "", + node1_ip => $anvil->data->{job}{node1_sn_ip}, + node2_ip => $anvil->data->{job}{node2_sn_ip}, }}); next; } - - # Found an SN. - $anvil->data->{job}{sn_network} = uc(($interface =~ /^(sn\d+)_/)[0]); - $anvil->data->{job}{node1_sn_ip} = $match->{$node1_short_host_name}{$interface}{ip}; - $anvil->data->{job}{node2_sn_ip} = $match->{$node2_short_host_name}{$interface}{ip}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - sn_network => $anvil->data->{job}{sn_network}, - node1_sn_ip => $anvil->data->{job}{node1_sn_ip}, - node2_sn_ip => $anvil->data->{job}{node2_sn_ip}, - }}); + else + { + last; + } } } diff --git a/tools/anvil-safe-start b/tools/anvil-safe-start index d04e7ffe..6ec7f671 100755 --- a/tools/anvil-safe-start +++ b/tools/anvil-safe-start @@ -16,6 +16,8 @@ # - Make this work on DR hosts. # - 'pcs quorum unblock' could be useful in sole-survivor cold starts. # - Start DRBD resources if the VMs are running already on the peer. +# - Check that the installed kernel-headers matches the running kernel and, if not, check with grubby to +# ensure the right kernel is set to boot. Then alert the user to a likely need to reboot. # use strict; diff --git a/tools/watch_drbd b/tools/anvil-watch-drbd similarity index 73% rename from tools/watch_drbd rename to tools/anvil-watch-drbd index b1f66049..02a15cf1 100755 --- a/tools/watch_drbd +++ b/tools/anvil-watch-drbd @@ -2,22 +2,83 @@ use strict; use warnings; +use Anvil::Tools; +use Data::Dumper; +use Text::Diff; use Term::Cap; +use Time::Local; -my $t = Term::Cap->Tgetent; +$| = 1; -my $root_directory = "/sys/kernel/debug/drbd/resources/"; -while(1) +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) { - system('clear'); - print $t->Tgoto("cm", 0, 0); + $running_directory =~ s/^\./$ENV{PWD}/; +} + +my $anvil = Anvil::Tools->new(); + +# Get a list of all interfaces with IP addresses. +$anvil->Get->switches({debug => 2, list => ["watch"]}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); + +our $t = Term::Cap->Tgetent; + +# One shot or continuous? +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { + 'switches::watch' => $anvil->data->{switches}{watch}, +}}); +if ($anvil->data->{switches}{watch}) +{ + # Do we have an interval? + my $interval = 2; + if ($anvil->data->{switches}{watch} =~ /^\d+$/) + { + $interval = $anvil->data->{switches}{watch}; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { interval => $interval }}); + + # Loop until terminated. + while(1) + { + show_status($anvil); + sleep $interval; + } +} +else +{ + # Once and exit. + show_status($anvil); +} + +$anvil->nice_exit({exit_code => 0}); + + +sub show_status +{ + my ($anvil) = @_; + + if ($anvil->data->{switches}{watch}) + { + system('clear'); + print $t->Tgoto("cm", 0, 0); + } + + if ($anvil->data->{switches}{watch}) + { + my $date = $anvil->Get->date_and_time(); + print "-=] Updated: ".$date." - Press ' + ' to exit\n"; + } + my $vms = {}; my $longest_resource = 3; # Res my $longest_connection = 2; # To my $longest_volume = 3; # Vol my $total_transfer = 0; + my $root_directory = $anvil->data->{path}{directories}{resource_status}; local(*DIRECTORY); opendir(DIRECTORY, $root_directory); while(my $file = readdir(DIRECTORY)) @@ -155,5 +216,4 @@ while(1) my $say_speed = $total_transfer / 1024; $say_speed =~ s/^(\d+\.\d{3})\d+/$1/; print "* Total transfer speed is about: [".$say_speed." MiB/sec]\n"; - sleep 2; }