From 62fe62a44b6ab6fe0028d56a739d043bffa4ba00 Mon Sep 17 00:00:00 2001 From: digimer Date: Mon, 11 Sep 2023 17:45:16 -0400 Subject: [PATCH 01/10] * Continued work on anvil-manage-server-system. It now displays the boot devices, CPU and RAM info. Signed-off-by: digimer --- man/anvil-manage-server-storage.8 | 2 +- man/anvil-manage-server-system.8 | 38 +++ share/words.xml | 2 + tools/anvil-manage-server-system | 486 ++++++++++++++++++++++++++++++ 4 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 man/anvil-manage-server-system.8 create mode 100755 tools/anvil-manage-server-system diff --git a/man/anvil-manage-server-storage.8 b/man/anvil-manage-server-storage.8 index d7c9c79c..2e2c23f7 100644 --- a/man/anvil-manage-server-storage.8 +++ b/man/anvil-manage-server-storage.8 @@ -42,7 +42,7 @@ This is the disk being worked on. For optical disks, it's the drive that an opti When not specified, if only one disk exists, it will be chosen automatically. .TP \fB\-\-eject\fR -This ejects the optical disc (ISO) in the drive specified by \fB\-\-disk\fR. +This ejects the optical disc (ISO) in the drive specified by \fB\-\-optical\fR. .TP \fB\-\-job\-uuid\fR This is the jobs -> job_uuid to execute. Generally this is only used by other programs. diff --git a/man/anvil-manage-server-system.8 b/man/anvil-manage-server-system.8 new file mode 100644 index 00000000..70ff1629 --- /dev/null +++ b/man/anvil-manage-server-system.8 @@ -0,0 +1,38 @@ +.\" Manpage for the Anvil! server system manager +.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. +.TH anvil-manage-server-system "8" "August 30 2023" "Anvil! Intelligent Availability™ Platform" +.SH NAME +anvil-manage-server-system \- Tool used to manage the system configuration of a hosted server. +.SH SYNOPSIS +.B anvil-manage-server-system +\fI\, \/\fR[\fI\,options\/\fR] +.SH DESCRIPTION +anvil-manage-server-system \- This tool is used to manage various system configuration components of hosted servers. Storage is NOT managed here, see 'anvil-manage-server-storage' for that. +.TP +When called without switches, the list of servers than can be worked on will be displayed. +.TP +.SH OPTIONS +.TP +\-?, \-h, \fB\-\-help\fR +Show this man page. +.TP +\fB\-\-log-secure\fR +When logging, record sensitive data, like passwords. +.TP +\-v, \-vv, \-vvv +Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. +.SS "Commands:" +.TP +\fB\-\-\fR + +.TP +\fB\-\-job\-uuid\fR +This is the jobs -> job_uuid to execute. Generally this is only used by other programs. +.TP +\fB\-\-\fR + +.IP +.SH AUTHOR +Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. +.SH "REPORTING BUGS" +Report bugs to users@clusterlabs.org diff --git a/share/words.xml b/share/words.xml index 8b851e10..e9fc241f 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1149,6 +1149,7 @@ resource #!variable!server!# { On Battery Estimated Runtime Last Updated + Optical Disc Configure Network @@ -3663,6 +3664,7 @@ Here we will inject 't_0006', which injects 't_0001' which has a variable: [#!st - Server is crashed! - Server is suspended. - Server is in an unknown state (int: [#!variable!state!#]). + ]]> @@ -3933,6 +3940,8 @@ We will try to proceed anyway. #!variable!error!# ==== + + diff --git a/tools/anvil-manage-server-system b/tools/anvil-manage-server-system index e68915e7..d1502c4a 100755 --- a/tools/anvil-manage-server-system +++ b/tools/anvil-manage-server-system @@ -271,12 +271,6 @@ sub manage_boot_menu my $new_definition = ""; my $in_os = 0; my $bootmenu_seen = 0; -=cut - - hvm - - -=cut foreach my $line (split/\n/, $anvil->data->{servers}{server_uuid}{$server_uuid}{server_definition_xml}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); @@ -289,23 +283,14 @@ sub manage_boot_menu { if ($line =~ //) { - my $old_value = $1; - $bootmenu_seen = 1; + my $old_value = $1; + $bootmenu_seen = 1; + $line =~ s///; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { old_value => $old_value, bootmenu_seen => $bootmenu_seen, + line => $line, }}); - if ($old_value eq $anvil->data->{switches}{'boot-menu'}) - { - # Update not needed. - print "The boot menu is already: [".$anvil->data->{switches}{'boot-menu'}."], update not needed.\n"; - } - else - { - # Update is needed. - $line =~ s///; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); - } } if ($line =~ /<\/os>/) { @@ -322,21 +307,13 @@ sub manage_boot_menu } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_definition => $new_definition }}); - my $difference = diff \$anvil->data->{servers}{server_uuid}{$server_uuid}{server_definition_xml}, \$new_definition, { STYLE => 'Unified' }; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { difference => $difference }}); - - if ($difference) - { - # Test parse it. - my $problem = $anvil->Server->parse_definition({ - debug => 2, - host => $short_host_name, - server => $server_name, - source => "test", - definition => $new_definition, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - } + # Always call this, as a previous run may have only updated some definitions. + my $problem = $anvil->Server->update_definition({ + debug => 2, + server => $server_uuid, + new_definition_xml => $new_definition, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); } else { From f12e001ac2ff0d591d48906cfae6d60a7c047fac Mon Sep 17 00:00:00 2001 From: digimer Date: Thu, 5 Oct 2023 23:49:05 -0400 Subject: [PATCH 05/10] Finished Server->connect_to_virsh(). * Now, connecting to virsh can detect when still-open connections already exist. Signed-off-by: digimer --- Anvil/Tools/Server.pm | 144 ++++++++++++++++++++++++++++++++++++++++++ share/words.xml | 2 + 2 files changed, 146 insertions(+) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index 528225ed..d5015647 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -16,6 +16,7 @@ my $THIS_FILE = "Server.pm"; ### Methods; # active_migrations # boot_virsh +# connect_to_virsh # count_servers # find # find_processes @@ -296,6 +297,149 @@ WHERE return($success); } +=head2 connect_to_libvirt + +This creates a connection to the libvirtd daemon on the target host. The connection to the host will be stored in: + +* libvirtd::::connection + +If the connection succeeds, C<< 0 >> will be returned. If the connection fails, C<< 1 >> will be returned. + +parameters + +=head3 server_name (optional) + +If this is set to the name of a server, that server will be searched for and, if found, the handle to it will be stored in: + +* libvirtd::::server::::connection + +If the server is not found, that will be set to C<< 0 >> + +=head3 target (optional, default is the local short host name) + +This is the target to connect to. + +B<< Note >>: Don't use C<< localhost >>! If you do, it will be changed to the short host name. This is because C<< localhost >> is converted to C<< ::1 >> which can cause connection problems. + +=cut +sub connect_to_libvirt +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->connect_to_libvirt()" }}); + + my $server_name = defined $parameter->{server_name} ? $parameter->{server_name} : ""; + my $target = defined $parameter->{target} ? $parameter->{target} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + server_name => $server_name, + target => $target, + }}); + + if ((not $target)or ($target eq "localhost")) + { + # Change to the short host name. + $target = $anvil->Get->short_host_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target => $target }}); + } + + # Does the handle already exist? + if ((exists $anvil->data->{libvirtd}{$target}) && (ref($anvil->data->{libvirtd}{$target}{connection}) eq "Sys::Virt")) + { + # Is this connection alive? + my $info = $anvil->data->{libvirtd}{$target}{connection}->get_node_info(); + if (ref($info) eq "HASH") + { + # No need to connect. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0814", variables => { target => $target }}); + } + else + { + # Stale connection. + $anvil->data->{libvirtd}{$target}{connection} = ""; + } + } + else + { + $anvil->data->{libvirtd}{$target}{connection} = ""; + } + + # If we don't have a connection, try to establish one now. + if (not $anvil->data->{libvirtd}{$target}{connection}) + { + my $uri = "qemu+ssh://".$target."/system"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uri => $uri }}); + + # Test connect + eval { $anvil->data->{libvirtd}{$target}{connection} = Sys::Virt->new(uri => $uri); }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "libvirtd::${target}::connection" => $anvil->data->{libvirtd}{$target}{connection}, + }}); + if ($@) + { + # Throw an error, then clear the URI so that we just update the DB/on-disk definitions. + $anvil->data->{libvirtd}{$target}{connection} = 0; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0162", variables => { + host_name => $target, + uri => $uri, + error => $@, + }}); + return(1); + } + } + + if ($server_name) + { + if (ref($anvil->data->{libvirtd}{$target}{server}{$server_name}{connection}) eq "Sys::Virt::Domain") + { + # If this connection still valid? + my $uuid = $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection}->get_uuid_string(); + if ($uuid) + { + # We're good. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0815", variables => { server_name => $server_name }}); + return(0); + } + else + { + # Stale connection. + $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection} = ""; + } + } + else + { + $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection} = ""; + } + + my $domain = ""; + my @domains = $anvil->data->{libvirtd}{$target}{connection}->list_all_domains(); + foreach my $domain_handle (@domains) + { + my $this_server_name = $domain_handle->get_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + domain_handle => $domain_handle, + this_server_name => $this_server_name, + }}); + next if $this_server_name ne $server_name; + + $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection} = $domain_handle; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "libvirtd::${target}::server::${server_name}::connection" => $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection}, + }}); + last; + } + } + + my $return = 0; + if (($server_name) && (not $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection})) + { + # Didn't find the server + return(1) + } + + return(0); +} =head2 count_servers diff --git a/share/words.xml b/share/words.xml index f73f20b3..58dcb931 100644 --- a/share/words.xml +++ b/share/words.xml @@ -2621,6 +2621,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is: - The host: [#!variable!host_name!#] is not configured, skipping it. The file: [#!variable!full_path!#] is in the database multiple times. This could be an artifact from peering Strikers. Selecting an entry to remove... Deleting the 'files' database entry for the file uuid: [#!variable!file_uuid!#]. + There is an existing a functioning connection to: [#!variable!target!#], no need to reconnect. + There is an existing a functioning connection to the server: [#!variable!server_name!#], no need to reconnect. The host name: [#!variable!target!#] does not resolve to an IP address. From 829ae546a2416c61b55f9f9f4db45528cd67412b Mon Sep 17 00:00:00 2001 From: digimer Date: Fri, 6 Oct 2023 22:40:08 -0400 Subject: [PATCH 06/10] Beginning work on new Server->locate() method to find servers across an Anvil! cluster. Signed-off-by: digimer --- Anvil/Tools/Server.pm | 194 ++++++++++++++++++++-- ocf/alteeve/server | 1 + scancore-agents/scan-cluster/scan-cluster | 1 + 3 files changed, 186 insertions(+), 10 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index d5015647..3ef87853 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -18,14 +18,15 @@ my $THIS_FILE = "Server.pm"; # boot_virsh # connect_to_virsh # count_servers -# find +# find # To be replaced by Server->locate(); # find_processes # get_definition # get_runtime # get_status +# locate # map_network -# parse_definition # migrate_virsh +# parse_definition # shutdown_virsh # update_definition @@ -313,7 +314,9 @@ If this is set to the name of a server, that server will be searched for and, if * libvirtd::::server::::connection -If the server is not found, that will be set to C<< 0 >> +If the server is not found, that will be set to C<< 0 >>. + +B<< Note >>: This can be set to C<< all >> and all servers we can connect to will be stored. =head3 target (optional, default is the local short host name) @@ -321,6 +324,10 @@ This is the target to connect to. B<< Note >>: Don't use C<< localhost >>! If you do, it will be changed to the short host name. This is because C<< localhost >> is converted to C<< ::1 >> which can cause connection problems. +=head3 target_ip (optional) + +If this is set, when building the URI, this IP or host name is used to connect. This allows the hash to use the C<< target >> name separately. + =cut sub connect_to_libvirt { @@ -332,9 +339,11 @@ sub connect_to_libvirt my $server_name = defined $parameter->{server_name} ? $parameter->{server_name} : ""; my $target = defined $parameter->{target} ? $parameter->{target} : ""; + my $target_ip = defined $parameter->{target_ip} ? $parameter->{target_ip} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_name => $server_name, target => $target, + target_ip => $target_ip, }}); if ((not $target)or ($target eq "localhost")) @@ -344,6 +353,12 @@ sub connect_to_libvirt $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target => $target }}); } + if (not $target_ip) + { + $target_ip = $target; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target_ip => $target_ip }}); + } + # Does the handle already exist? if ((exists $anvil->data->{libvirtd}{$target}) && (ref($anvil->data->{libvirtd}{$target}{connection}) eq "Sys::Virt")) { @@ -368,7 +383,7 @@ sub connect_to_libvirt # If we don't have a connection, try to establish one now. if (not $anvil->data->{libvirtd}{$target}{connection}) { - my $uri = "qemu+ssh://".$target."/system"; + my $uri = "qemu+ssh://".$target_ip."/system"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uri => $uri }}); # Test connect @@ -389,7 +404,7 @@ sub connect_to_libvirt } } - if ($server_name) + if (($server_name) && ($server_name ne "all")) { if (ref($anvil->data->{libvirtd}{$target}{server}{$server_name}{connection}) eq "Sys::Virt::Domain") { @@ -411,7 +426,11 @@ sub connect_to_libvirt { $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection} = ""; } - + } + + # If we have a server name, or if it's 'all', connect. + if ($server_name) + { my $domain = ""; my @domains = $anvil->data->{libvirtd}{$target}{connection}->list_all_domains(); foreach my $domain_handle (@domains) @@ -421,7 +440,10 @@ sub connect_to_libvirt domain_handle => $domain_handle, this_server_name => $this_server_name, }}); - next if $this_server_name ne $server_name; + if (($server_name ne "all") && ($this_server_name ne $server_name)) + { + next; + } $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection} = $domain_handle; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { @@ -432,7 +454,7 @@ sub connect_to_libvirt } my $return = 0; - if (($server_name) && (not $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection})) + if (($server_name) && ($server_name ne "all") && (not $anvil->data->{libvirtd}{$target}{server}{$server_name}{connection})) { # Didn't find the server return(1) @@ -508,6 +530,7 @@ pmsuspended - The domain has been suspended by guest power management, e.g. ente } +### TODO: Phase this out in favor for Server->locate() =head2 find This will look on the local or a remote machine for the list of servers that are running. @@ -1132,8 +1155,7 @@ sub map_network target => $target, }}); - # NOTE: We don't use 'Server->find' as the hassle of tracking hosts to target isn't worth it. - # Get a list of servers. + ### TODO: Switch to using Server->locate() my $shell_call = $anvil->data->{path}{exe}{setsid}." --wait ".$anvil->data->{path}{exe}{virsh}." list"; my $output = ""; if ($anvil->Network->is_local({host => $target})) @@ -1206,6 +1228,158 @@ sub map_network } +=head2 locate + +B<< Note >>: This is meant to replace C<< Server->find >> and so the hash conflicts. + +This walks through all known and accessible subnodes and DR hosts looking for a server. If it's found, it's status on that host are stored in the hash; + +* server_location::host::::access = [0,1] +* server_location::host::::server::::status = +* server_location::host::::server::::active_definition = +* server_location::host::::server::::inactive_definition = + +If the target was not accessible, C<< access >> is set to C<< 0 >>. This is meant to allow telling the difference between "we know there's no servers on that host" versus "we don't know what's there because we couldn't access it". + +If the server is found to be C<< unknown >> or C<< shut off >>, then C<< inactive_definition >> is not set. In all other states, the inactive XML is stored, but often it's the same as the C<< active_definition >>, so the caller is suggested to diff the XMLs when it might be relevant. + +The C<< status >> can be: + +* unknown # The server was found, but it has an unknown state +* running # Server is running. +* blocked # Server is blocked (IO contention?). +* paused # Server is paused (migration target?). +* in shutdown # Server is shutting down. +* shut off # Server is shut off. +* crashed # Server is crashed! +* pmsuspended # Server is suspended. + +If there is a problem, C<< !!error!! >> is returned. If the server is found on at least one host, C<< 0 >> is returned. If the server is not located anywhere, C<< 1 >> is returned. + +C<< Note >>: By design, servers are set to 'undefined' on subnodes, so when the server shuts off, it disappears from libvirtd. This is normal and expected. + +Parameters; + +=head3 server_name (required) + +This is the name of the server being located. It can be set to C<< all >>, in which case all servers on all hosts are located. + +=cut +sub locate +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->locate()" }}); + + my $server = defined $parameter->{server} ? $parameter->{server} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + server => $server, + }}); + + if (not $server) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->locate()", parameter => "server" }}); + return(1); + } + + if (exists $anvil->data->{server_location}{host}) + { + delete $anvil->data->{server_location}{host}; + } + + # Connect to all hosts. + $anvil->Database->get_hosts({debug => $debug}); + + foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) + { + my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; + my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; + my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name} + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:host_name' => $host_name, + 's2:host_uuid' => $host_uuid, + 's3:host_type' => $host_type, + 's4:short_host_name' => $short_host_name, + }}); + next if $host_type eq "striker"; + + # This will switch to '1' if we connect to libvirtd. + $anvil->data->{server_location}{host}{$short_host_name}{access} = 0; + + # What IP to use? + my $target_ip = $anvil->Network->find_target_ip({ + debug => $debug, + host_uuid => $host_uuid, + networks => "bcn,ifn", # Reduced list to not slow things down with test_access + test_access => 1, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target_ip => $target_ip }}); + + if ($target_ip) + { + # Try to connect to libvirtd. + my $problem = $anvil->Server->connect_to_libvirt({ + debug => $debug, + target => $short_host_name, + target_ip => $target_ip, + server => $server eq "all" ? "" : $server, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); + if (not $problem) + { + # We're connected! If we had a specific server + $anvil->data->{libvirtd}{$short_host_name}{connection} + } + + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{status} = ""; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{active_definition} = ""; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{inactive_definition} = ""; + + } + } + + + # Get the inactive XML (changes requested by the user may not match the in-memory XML) + my $virsh_definition_active = $anvil->data->{domain}{$server_name}{handle}->get_xml_description(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { virsh_definition_active => $virsh_definition_active }}); + + my $virsh_definition_inactive = $anvil->data->{domain}{$server_name}{handle}->get_xml_description(Sys::Virt::Domain::XML_INACTIVE); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_definition_inactive => $virsh_definition_inactive }}); + + my ($anvil, $server_name) = @_; + + ### States: + # 0 = no state + # 1 = running - The domain is currently running on a CPU + # 2 = blocked (idle) - the domain is blocked on resource. This can be caused because the domain is waiting on IO (a traditional wait state) or has gone to sleep because there was nothing else for it to do. + # 3 = paused - The domain has been paused, usually occurring through the administrator running virsh suspend. When in a paused state the domain will still consume allocated resources like memory, but will not be eligible for scheduling by the hypervisor. + # 4 = in shutdown - The domain is in the process of shutting down, i.e. the guest operating system has been notified and should be in the process of stopping its operations gracefully. + # 5 = shut off - The domain is not running. Usually this indicates the domain has been shut down completely, or has not been started. + # 6 = crashed - The domain has crashed, which is always a violent ending. Usually this state can only occur if the domain has been configured not to restart on crash. + # 7 = pmsuspended - The domain has been suspended by guest power management, e.g. entered into s3 state. + my ($state, $reason) = $anvil->data->{domain}{$server_name}{handle}->get_state(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'state' => $state, + reason => $reason, + }}); + + ### Reasons are dependent on the state. + ### See: https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainShutdownReason + my $server_state = "unknown"; + if ($state == 1) { $server_state = "running"; } # Server is running. + elsif ($state == 2) { $server_state = "blocked"; } # Server is blocked (IO contention?). + elsif ($state == 3) { $server_state = "paused"; } # Server is paused (migration target?). + elsif ($state == 4) { $server_state = "in shutdown"; } # Server is shutting down. + elsif ($state == 5) { $server_state = "shut off"; } # Server is shut off. + elsif ($state == 6) { $server_state = "crashed"; } # Server is crashed! + elsif ($state == 7) { $server_state = "pmsuspended"; } # Server is suspended. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_state => $server_state }}); + + return($server_state); +} + =head2 migrate_virsh This will migrate (push or pull) a server from one node to another. If the migration was successful, C<< 1 >> is returned. Otherwise, C<< 0 >> is returned with a (hopefully) useful error being logged. diff --git a/ocf/alteeve/server b/ocf/alteeve/server index c8ef3978..acdccb05 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -1080,6 +1080,7 @@ sub start_drbd_resource return(0); } +### TODO: Rework this to use Server->connect_to_libvirtd and phase out Server->find(). # This uses the DRBD information to find other peers and see if the server is running on them. sub find_server { diff --git a/scancore-agents/scan-cluster/scan-cluster b/scancore-agents/scan-cluster/scan-cluster index 3d92ed4f..dac1623c 100755 --- a/scancore-agents/scan-cluster/scan-cluster +++ b/scancore-agents/scan-cluster/scan-cluster @@ -205,6 +205,7 @@ sub cib_cleanup return(0); } +### TODO: Rework this to use Server->connect_to_libvirtd and phase out Server->find(). # This looks for failed resource and, if found, tries to recover them. sub check_resources { From 55b1380031e2149758328688eb0fa01bd6fc2a80 Mon Sep 17 00:00:00 2001 From: digimer Date: Tue, 10 Oct 2023 19:14:00 -0400 Subject: [PATCH 07/10] Finished (but need more testing) of Server->locate(). This includes the changes in PR#492. Signed-off-by: digimer --- Anvil/Tools/Server.pm | 151 +++++++++++++++++++++++++---------------- share/words.xml | 1 + tools/anvil-join-anvil | 10 ++- 3 files changed, 103 insertions(+), 59 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index 3ef87853..aa134fde 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -1230,18 +1230,21 @@ sub map_network =head2 locate -B<< Note >>: This is meant to replace C<< Server->find >> and so the hash conflicts. +B<< Note >>: This is meant to replace C<< Server->find >>. -This walks through all known and accessible subnodes and DR hosts looking for a server. If it's found, it's status on that host are stored in the hash; +This walks through all known and accessible subnodes and DR hosts looking for a server. If a specific server is searched for and it's found running, the C<< short_host_name >> is returned. If there is a problem, C<< !!error!! >> is returned. + +If a specific requested server is found, or is being asked to search for all servers, the following data is stored; * server_location::host::::access = [0,1] * server_location::host::::server::::status = * server_location::host::::server::::active_definition = * server_location::host::::server::::inactive_definition = +* server_location::host::::server::::definition_diff = If the target was not accessible, C<< access >> is set to C<< 0 >>. This is meant to allow telling the difference between "we know there's no servers on that host" versus "we don't know what's there because we couldn't access it". -If the server is found to be C<< unknown >> or C<< shut off >>, then C<< inactive_definition >> is not set. In all other states, the inactive XML is stored, but often it's the same as the C<< active_definition >>, so the caller is suggested to diff the XMLs when it might be relevant. +If the server is found to be C<< running >> or C<< paused >>, then C<< active_definition >> is set and, if there's a difference, that will be stored. In all other states, the inactive XML is stored. The C<< status >> can be: @@ -1256,6 +1259,11 @@ The C<< status >> can be: If there is a problem, C<< !!error!! >> is returned. If the server is found on at least one host, C<< 0 >> is returned. If the server is not located anywhere, C<< 1 >> is returned. +The connection to the host and to the server(s) is cached, for your use; + +* server_location::host::::connection = +* server_location::host::::server::::connection = + C<< Note >>: By design, servers are set to 'undefined' on subnodes, so when the server shuts off, it disappears from libvirtd. This is normal and expected. Parameters; @@ -1273,15 +1281,15 @@ sub locate my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->locate()" }}); - my $server = defined $parameter->{server} ? $parameter->{server} : ""; + my $server_name = defined $parameter->{server_name} ? $parameter->{server_name} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - server => $server, + server_name => $server_name, }}); - if (not $server) + if (not $server_name) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->locate()", parameter => "server" }}); - return(1); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->locate()", parameter => "server_name" }}); + return('!!error!!'); } if (exists $anvil->data->{server_location}{host}) @@ -1289,6 +1297,9 @@ sub locate delete $anvil->data->{server_location}{host}; } + # This will be set if the server is found to be 'running' on a host. + my $server_host = ""; + # Connect to all hosts. $anvil->Database->get_hosts({debug => $debug}); @@ -1296,7 +1307,7 @@ sub locate { my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; - my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name} + my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 's1:host_name' => $host_name, 's2:host_uuid' => $host_uuid, @@ -1321,63 +1332,87 @@ sub locate { # Try to connect to libvirtd. my $problem = $anvil->Server->connect_to_libvirt({ - debug => $debug, - target => $short_host_name, - target_ip => $target_ip, - server => $server eq "all" ? "" : $server, + debug => $debug, + target => $short_host_name, + target_ip => $target_ip, + server_name => $server_name, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); if (not $problem) { - # We're connected! If we had a specific server - $anvil->data->{libvirtd}{$short_host_name}{connection} + # We're connected! Collect the data on the requested server(s), if applicable. + $anvil->data->{server_location}{host}{$short_host_name}{access} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "server_location::host::${short_host_name}::access" => $anvil->data->{server_location}{host}{$short_host_name}{access}, + }}); + + if ($server_name) + { + my $connection_handle = $anvil->data->{libvirtd}{$short_host_name}{connection}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { connection_handle => $connection_handle }}); + foreach my $this_server_name (sort {$a cmp $b} keys %{$anvil->data->{libvirtd}{$short_host_name}{server}}) + { + if (($server_name eq "all") or ($server_name eq $this_server_name)) + { + my $server_handle = $anvil->data->{libvirtd}{$short_host_name}{server}{$server_name}{connection}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_handle => $server_handle }}); + + # Get the server's state, then convert to a string + my ($state, $reason) = $server_handle->get_state(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 'state' => $state, + reason => $reason, + }}); + + ### Reasons are dependent on the state. + ### See: https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainShutdownReason + my $server_state = "unknown"; + if ($state == 1) { $server_state = "running"; } # Server is running. + elsif ($state == 2) { $server_state = "blocked"; } # Server is blocked (IO contention?). + elsif ($state == 3) { $server_state = "paused"; } # Server is paused (migration target?). + elsif ($state == 4) { $server_state = "in shutdown"; } # Server is shutting down. + elsif ($state == 5) { $server_state = "shut off"; } # Server is shut off. + elsif ($state == 6) { $server_state = "crashed"; } # Server is crashed! + elsif ($state == 7) { $server_state = "pmsuspended"; } # Server is suspended. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_state => $server_state }}); + + # Get the persistent definition + my $inactive_definition = $server_handle->get_xml_description(Sys::Virt::Domain::XML_INACTIVE); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { inactive_definition => $inactive_definition }}); + + # Get the active definition, if applicable. + my $active_definition = ""; + my $definition_diff = ""; + if (($server_state eq "running") or ($server_state eq "paused")) + { + # Get the active definition + $active_definition = $server_handle->get_xml_description(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { active_definition => $active_definition }}); + + # Check for a diff. + $definition_diff = diff \$active_definition, \$inactive_definition, { STYLE => 'Unified' }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { definition_diff => $definition_diff }}); + } + + # If it's running, record the host. + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{status} = $server_state; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{active_definition} = $active_definition; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{inactive_definition} = $inactive_definition; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{definition_diff} = $definition_diff; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "server_location::host::${short_host_name}::server::${server_name}::status" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{status}, + "server_location::host::${short_host_name}::server::${server_name}::active_definition" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{active_definition}, + "server_location::host::${short_host_name}::server::${server_name}::inactive_definition" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{inactive_definition}, + "server_location::host::${short_host_name}::server::${server_name}::definition_diff" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{definition_diff}, + }}); + } + } + } } - - $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{status} = ""; - $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{active_definition} = ""; - $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{inactive_definition} = ""; - } } - - # Get the inactive XML (changes requested by the user may not match the in-memory XML) - my $virsh_definition_active = $anvil->data->{domain}{$server_name}{handle}->get_xml_description(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { virsh_definition_active => $virsh_definition_active }}); - - my $virsh_definition_inactive = $anvil->data->{domain}{$server_name}{handle}->get_xml_description(Sys::Virt::Domain::XML_INACTIVE); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { virsh_definition_inactive => $virsh_definition_inactive }}); - - my ($anvil, $server_name) = @_; - - ### States: - # 0 = no state - # 1 = running - The domain is currently running on a CPU - # 2 = blocked (idle) - the domain is blocked on resource. This can be caused because the domain is waiting on IO (a traditional wait state) or has gone to sleep because there was nothing else for it to do. - # 3 = paused - The domain has been paused, usually occurring through the administrator running virsh suspend. When in a paused state the domain will still consume allocated resources like memory, but will not be eligible for scheduling by the hypervisor. - # 4 = in shutdown - The domain is in the process of shutting down, i.e. the guest operating system has been notified and should be in the process of stopping its operations gracefully. - # 5 = shut off - The domain is not running. Usually this indicates the domain has been shut down completely, or has not been started. - # 6 = crashed - The domain has crashed, which is always a violent ending. Usually this state can only occur if the domain has been configured not to restart on crash. - # 7 = pmsuspended - The domain has been suspended by guest power management, e.g. entered into s3 state. - my ($state, $reason) = $anvil->data->{domain}{$server_name}{handle}->get_state(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 'state' => $state, - reason => $reason, - }}); - - ### Reasons are dependent on the state. - ### See: https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainShutdownReason - my $server_state = "unknown"; - if ($state == 1) { $server_state = "running"; } # Server is running. - elsif ($state == 2) { $server_state = "blocked"; } # Server is blocked (IO contention?). - elsif ($state == 3) { $server_state = "paused"; } # Server is paused (migration target?). - elsif ($state == 4) { $server_state = "in shutdown"; } # Server is shutting down. - elsif ($state == 5) { $server_state = "shut off"; } # Server is shut off. - elsif ($state == 6) { $server_state = "crashed"; } # Server is crashed! - elsif ($state == 7) { $server_state = "pmsuspended"; } # Server is suspended. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_state => $server_state }}); - - return($server_state); + return($server_host); } =head2 migrate_virsh diff --git a/share/words.xml b/share/words.xml index 58dcb931..c40d8099 100644 --- a/share/words.xml +++ b/share/words.xml @@ -2623,6 +2623,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Deleting the 'files' database entry for the file uuid: [#!variable!file_uuid!#]. There is an existing a functioning connection to: [#!variable!target!#], no need to reconnect. There is an existing a functioning connection to the server: [#!variable!server_name!#], no need to reconnect. + Waiting for: [#!variable!delay!#] seconds. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-join-anvil b/tools/anvil-join-anvil index 5451acfd..17fa14ea 100755 --- a/tools/anvil-join-anvil +++ b/tools/anvil-join-anvil @@ -298,6 +298,9 @@ sub configure_pacemaker my $both_online = 0; until($both_online) { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + 'path::configs::corosync.conf' => $anvil->data->{path}{configs}{'corosync.conf'}, + }}); if (-e $anvil->data->{path}{configs}{'corosync.conf'}) { if (not $start_time) @@ -362,7 +365,12 @@ sub configure_pacemaker # corosync.conf doesn't exist yet. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0107"}); } - sleep 5 if not $both_online; + if (not $both_online) + { + my $delay = 5; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0816", variables => { delay => $delay }}); + sleep $delay; + } } } else From 68521cdab7dd91d1c56bed2a789a9d573276b44a Mon Sep 17 00:00:00 2001 From: digimer Date: Wed, 11 Oct 2023 17:19:39 -0400 Subject: [PATCH 08/10] Updated striker-get-screenshots to set permissions properly. This updates the /opt/alteeve/screenshot directories and the screenshots in them to be readible by the WebUI. Signed-off-by: digimer --- Anvil/Tools.pm | 1 + tools/anvil-join-anvil | 2 +- tools/striker-get-screenshots | 42 +++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 6745afa2..0a37e938 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1115,6 +1115,7 @@ sub _set_paths ifcfg => "/etc/sysconfig/network-scripts", journald => "/var/log/journal", libvirtd_definitions => "/etc/libvirt/qemu/", + opt_alteeve => "/opt/alteeve", pgsql => "/var/lib/pgsql/", resource_status => "/sys/kernel/debug/drbd/resources", scan_agents => "/usr/sbin/scancore-agents", diff --git a/tools/anvil-join-anvil b/tools/anvil-join-anvil index 17fa14ea..b6d9ad1b 100755 --- a/tools/anvil-join-anvil +++ b/tools/anvil-join-anvil @@ -298,7 +298,7 @@ sub configure_pacemaker my $both_online = 0; until($both_online) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'path::configs::corosync.conf' => $anvil->data->{path}{configs}{'corosync.conf'}, }}); if (-e $anvil->data->{path}{configs}{'corosync.conf'}) diff --git a/tools/striker-get-screenshots b/tools/striker-get-screenshots index 0805428e..28b8bc42 100755 --- a/tools/striker-get-screenshots +++ b/tools/striker-get-screenshots @@ -42,6 +42,9 @@ if ($anvil->data->{switches}{'job-uuid'}) $anvil->data->{job}{progress} = 1; +# Make sure the directory we write screenshots to exists and has the proper ownership and mode. +check_screenshot_directory($anvil); + # Which subnodes are up? $anvil->Database->get_hosts(); $anvil->Database->get_dr_links(); @@ -178,6 +181,29 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# +sub check_screenshot_directory +{ + my ($anvil) = @_; + + foreach my $directory ($anvil->data->{path}{directories}{opt_alteeve}, $anvil->data->{path}{directories}{screenshots}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { directory => $directory }}); + $anvil->Storage->change_owner({ + debug => 2, + path => $directory, + user => "striker-ui-api", + group => "striker-ui-api", + }); + $anvil->Storage->change_mode({ + debug => 2, + path => $directory, + mode => "0755", + }); + } + + return(0); +} + sub remove_old_screenshots { my ($anvil) = @_; @@ -411,6 +437,14 @@ sub get_screenshots 'format' => "jpeg", }, }); + + # Change the ownership + $anvil->Storage->change_owner({ + debug => 2, + path => $jpg_file, + user => "striker-ui-api", + group => "striker-ui-api", + }); } # Convert to png @@ -438,6 +472,14 @@ sub get_screenshots 'format' => "png", }, }); + + # Change the ownership + $anvil->Storage->change_owner({ + debug => 2, + path => $png_file, + user => "striker-ui-api", + group => "striker-ui-api", + }); } # Delete the original PPM file? From 7545df1e555460a7f097c52a4bdad3c6585bcc4b Mon Sep 17 00:00:00 2001 From: digimer Date: Wed, 11 Oct 2023 22:22:06 -0400 Subject: [PATCH 09/10] Fixed a bug in which host runs an anvil-delete-server job. * Updated anvil-delete-server to use the new Server->locate method. This was done as the old Server->locate() was failing to find the server running on the peer when anvil-delete-server was running on the backup subnode. * Updated Server->locate() to search hosts for XML definition and DRBD configs so that it can record where the server is recorded to run, even if the server isn't running or defined at the time the locate ran. Signed-off-by: digimer --- Anvil/Tools/Server.pm | 94 ++++++++++++++++++++++++++++++++++++--- tools/anvil-delete-server | 80 ++++++++++++++++++--------------- 2 files changed, 132 insertions(+), 42 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index aa134fde..2fbd30f9 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -1241,6 +1241,8 @@ If a specific requested server is found, or is being asked to search for all ser * server_location::host::::server::::active_definition = * server_location::host::::server::::inactive_definition = * server_location::host::::server::::definition_diff = +* server_location::host::::server::::file_definition = +* server_location::host::::server::::drbd_config = If the target was not accessible, C<< access >> is set to C<< 0 >>. This is meant to allow telling the difference between "we know there's no servers on that host" versus "we don't know what's there because we couldn't access it". @@ -1259,6 +1261,8 @@ The C<< status >> can be: If there is a problem, C<< !!error!! >> is returned. If the server is found on at least one host, C<< 0 >> is returned. If the server is not located anywhere, C<< 1 >> is returned. +If the server has a replicated storage (DRBD) config and/or a definition file, whether the server is found running or not, will be recorded. This can be used to see if the server has been configured to run there or not. + The connection to the host and to the server(s) is cached, for your use; * server_location::host::::connection = @@ -1319,26 +1323,28 @@ sub locate # This will switch to '1' if we connect to libvirtd. $anvil->data->{server_location}{host}{$short_host_name}{access} = 0; - # What IP to use? + # What IP to use? Don't test access, it's too slow if there's several down hosts. my $target_ip = $anvil->Network->find_target_ip({ debug => $debug, host_uuid => $host_uuid, - networks => "bcn,ifn", # Reduced list to not slow things down with test_access - test_access => 1, + networks => "bcn,mn,sn,ifn", + test_access => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { target_ip => $target_ip }}); if ($target_ip) { # Try to connect to libvirtd. - my $problem = $anvil->Server->connect_to_libvirt({ + $anvil->Server->connect_to_libvirt({ debug => $debug, target => $short_host_name, target_ip => $target_ip, server_name => $server_name, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); - if (not $problem) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "libvirtd::${short_host_name}::connection" => $anvil->data->{libvirtd}{$short_host_name}{connection}, + }}); + if (ref($anvil->data->{libvirtd}{$short_host_name}{connection}) eq "Sys::Virt") { # We're connected! Collect the data on the requested server(s), if applicable. $anvil->data->{server_location}{host}{$short_host_name}{access} = 1; @@ -1352,6 +1358,7 @@ sub locate $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { connection_handle => $connection_handle }}); foreach my $this_server_name (sort {$a cmp $b} keys %{$anvil->data->{libvirtd}{$short_host_name}{server}}) { + next if (ref($anvil->data->{libvirtd}{$short_host_name}{server}{$server_name}{connection}) ne "Sys::Virt::Domain"); if (($server_name eq "all") or ($server_name eq $this_server_name)) { my $server_handle = $anvil->data->{libvirtd}{$short_host_name}{server}{$server_name}{connection}; @@ -1394,6 +1401,12 @@ sub locate $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { definition_diff => $definition_diff }}); } + if ($server_state eq "running") + { + $server_host = $short_host_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_host => $server_host }}); + } + # If it's running, record the host. $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{status} = $server_state; $anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{active_definition} = $active_definition; @@ -1407,6 +1420,75 @@ sub locate }}); } } + + # If we've connected to the host, see if the XML definition file + # and/or DRBD config file exist. + my $servers = []; + if ($server_name eq "all") + { + # Search for any server we can find. + $anvil->Database->get_servers(); + foreach my $server_uuid (sort {$a cmp $b} keys %{$anvil->data->{servers}{server_uuid}}) + { + next if $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state} eq "DELETED"; + my $this_server_name = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name}; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + server_uuid => $server_uuid, + this_server_name => $this_server_name, + }}); + push @{$servers}, $this_server_name; + } + } + else + { + push @{$servers}, $server_name; + } + + foreach my $this_server_name (sort {$a cmp $b} @{$servers}) + { + # Look for the files for the specified server. + $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{file_definition} = ""; + $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{drbd_config} = ""; + + # See if there's a definition file and/or a DRBD + # config file on this host. + my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$this_server_name.".xml"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { definition_file => $definition_file }}); + + # Can I read the definition file? + my $definition_body = $anvil->Storage->read_file({ + debug => $debug, + file => $definition_file, + target => $target_ip, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { definition_body => $definition_body }}); + + if (($definition_body) && ($definition_body ne "!!error!!")) + { + $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{file_definition} = $definition_body; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "server_location::host::${short_host_name}::server::${this_server_name}::file_definition" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{file_definition}, + }}); + } + + my $drbd_config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$this_server_name.".res"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { drbd_config_file => $drbd_config_file }}); + + my $drbd_body = $anvil->Storage->read_file({ + debug => $debug, + file => $drbd_config_file, + target => $target_ip, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_body => $drbd_body }}); + if (($drbd_body) && ($drbd_body ne "!!error!!")) + { + $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{drbd_config} = $drbd_body; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "server_location::host::${short_host_name}::server::${this_server_name}::drbd_config" => $anvil->data->{server_location}{host}{$short_host_name}{server}{$this_server_name}{drbd_config}, + }}); + } + } } } } diff --git a/tools/anvil-delete-server b/tools/anvil-delete-server index e9fa2e03..4b7bdc89 100755 --- a/tools/anvil-delete-server +++ b/tools/anvil-delete-server @@ -774,58 +774,66 @@ sub save_job my $server_name = $anvil->data->{switches}{server_name}; my $server_uuid = $anvil->data->{switches}{server_uuid}; my $delete_uuid = $server_uuid; - push @{$hosts}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; - push @{$hosts}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; - if ($anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, + server_name => $server_name, + server_uuid => $server_uuid, + delete_uuid => $delete_uuid, + }}); + + if ((not $server_name) && (exists $anvil->data->{servers}{server_uuid}{$server_uuid}) && ($anvil->data->{servers}{server_uuid}{$server_uuid}{server_name})) { - push @{$hosts}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + $server_name = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }}); } - my $server_host = ""; - foreach my $host_uuid (@{$hosts}) + + # Find the server on hosts. + my $server_host_name = $anvil->Server->locate({ + debug => 2, + server_name => $server_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_host_name => $server_host_name }}); + + foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{server_location}{host}}) { - if ($host_uuid eq $anvil->Get->host_uuid) + my $host_uuid = $anvil->Database->get_host_uuid_from_string({string => $short_host_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + short_host_name => $short_host_name, + host_uuid => $host_uuid, + }}); + + my $exists = 0; + if (($anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{file_definition}) or ($anvil->data->{server_location}{host}{$short_host_name}{server}{$server_name}{drbd_config})) { - # This is us. - $anvil->Server->find({refresh => 0}); + $exists = 1; } - else + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'exists' => $exists }}); + + if (($exists) or + ($host_uuid eq $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}) or + ($host_uuid eq $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid})) { - # This is another machine. - my $target_ip = $anvil->Network->find_target_ip({host_uuid => $host_uuid}); - $anvil->Server->find({ - refresh => 0, - target => $target_ip, - password => $password, - }); + push @{$hosts}, $host_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }}); } } - my $host_name = ""; - my $host_uuid = ""; - if (exists $anvil->data->{server}{location}{$server_name}) + # If the server was found to be running, the host will be returned. + my $server_host_uuid = ""; + if ($server_host_name) { - my $status = $anvil->data->{server}{location}{$server_name}{status}; - $host_name = $anvil->data->{server}{location}{$server_name}{host_name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - status => $status, - host_name => $host_name, - }}); - if ($status eq "running") - { - $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $host_name}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }}); - } + $server_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $server_host_name}); } # Now, we'll do the delete, unless we see the server running elsewhere. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_host_uuid => $server_host_uuid }}); my $job_host_uuid = ""; - if ($host_uuid) + if ($server_host_uuid) { - $job_host_uuid = $host_uuid; + $job_host_uuid = $server_host_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_host_uuid => $job_host_uuid }}); - if ($host_uuid eq $anvil->Get->host_uuid) + if ($server_host_uuid eq $anvil->Get->host_uuid) { # Running here print $anvil->Words->string({key => "message_0216"})."\n"; @@ -833,7 +841,7 @@ sub save_job else { # Running on a peer. - print $anvil->Words->string({key => "message_0214", variables => { host_name => $host_name }})."\n"; + print $anvil->Words->string({key => "message_0214", variables => { host_name => $server_host_name }})."\n"; } } else From b3c067b0165d14d989b3d365d96fbeffb2455b17 Mon Sep 17 00:00:00 2001 From: digimer Date: Thu, 12 Oct 2023 01:01:31 -0400 Subject: [PATCH 10/10] Fixed a bug in anvil-manage-files where missing files weren't being downloaded. Signed-off-by: digimer --- Anvil/Tools/Database.pm | 2 +- Anvil/Tools/Storage.pm | 2 +- tools/anvil-manage-files | 14 ++++---------- tools/striker-file-manager | 2 ++ 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index dade1c1d..5e33a51e 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -3580,7 +3580,7 @@ FROM }}); } - # If this host is a node in an Anvil!, set the old 'file_location_anvil_uuid' to maintain + # If this host is an Anvil! subnode, set the old 'file_location_anvil_uuid' to maintain # backwards compatibility. if ((exists $anvil->data->{hosts}{host_uuid}{$file_location_host_uuid}) && ($anvil->data->{hosts}{host_uuid}{$file_location_host_uuid}{anvil_uuid})) diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index 40fc9d82..053e8e32 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -776,7 +776,7 @@ sub check_files } elsif ($file_location_ready) { - # File doesn't exist but is marked as read, mark it as not ready. + # File doesn't exist but is marked as ready, mark it as not ready. $anvil->Database->insert_or_update_file_locations({ debug => $debug, file_location_uuid => $file_location_uuid, diff --git a/tools/anvil-manage-files b/tools/anvil-manage-files index db915c3f..075ddb19 100755 --- a/tools/anvil-manage-files +++ b/tools/anvil-manage-files @@ -175,6 +175,9 @@ sub find_missing_files # What am I? This will impact how missing files are found. $anvil->Database->get_anvils(); + my $host_uuid = $anvil->Get->host_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }}); + my $query = " SELECT file_uuid, @@ -237,16 +240,7 @@ ORDER BY else { # Check to see if we're supposed to have this file. - $anvil->Database->get_file_locations(); - my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); - - # Nothing to do if we're not in an Anvil! yet. - next if not $anvil_uuid; - - # Do we have a file_location_uuid? If not, there will be soon but nothing to do until - # then. - my $file_location_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{file_uuid}{$file_uuid}{file_location_uuid}; + my $file_location_uuid = $anvil->data->{file_locations}{host_uuid}{$host_uuid}{file_uuid}{$file_uuid}{file_location_uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_location_uuid => $file_location_uuid }}); next if not $file_location_uuid; diff --git a/tools/striker-file-manager b/tools/striker-file-manager index aed0d50c..78256c41 100755 --- a/tools/striker-file-manager +++ b/tools/striker-file-manager @@ -2,6 +2,8 @@ # # This is the command line user interface for managing files on /mnt/shared/files on Strikers and made # available on Anvil! systems. +# +# NOTE: This program is incomplete! # use strict;