Merge pull request #376 from ClusterLabs/anvil-tools-dev

Anvil tools dev
main
Digimer 1 year ago committed by GitHub
commit 25b46058f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      Anvil/Tools.pm
  2. 121
      Anvil/Tools/Cluster.pm
  3. 4
      Anvil/Tools/DRBD.pm
  4. 5
      Anvil/Tools/Network.pm
  5. 2
      Anvil/Tools/Remote.pm
  6. 66
      Anvil/Tools/Server.pm
  7. 179
      Anvil/Tools/System.pm
  8. 3
      anvil.spec.in
  9. 10
      man/Makefile.am
  10. 6
      man/anvil-boot-server.8
  11. 36
      man/anvil-report-usage.8
  12. 29
      man/anvil-safe-start.8
  13. 45
      man/anvil-safe-stop.8
  14. 45
      man/anvil-shutdown-server.8
  15. 6
      man/anvil-update-system.8
  16. 6
      man/striker-collect-debug.8
  17. 6
      man/striker-update-cluster.8
  18. 95
      scancore-agents/scan-cluster/scan-cluster
  19. 1
      scancore-agents/scan-cluster/scan-cluster.xml
  20. 16
      scancore-agents/scan-server/scan-server
  21. 23
      share/words.xml
  22. 5
      tools/anvil-configure-host
  23. 227
      tools/anvil-daemon
  24. 69
      tools/anvil-delete-server
  25. 37
      tools/anvil-manage-power
  26. 7
      tools/anvil-manage-server-storage
  27. 213
      tools/anvil-report-usage
  28. 65
      tools/anvil-safe-start
  29. 146
      tools/anvil-safe-stop
  30. 148
      tools/anvil-shutdown-server
  31. 6
      tools/anvil-special-operations
  32. 340
      tools/anvil-update-system
  33. 11
      tools/scancore
  34. 70
      tools/striker-collect-debug
  35. 542
      tools/striker-update-cluster

@ -1092,6 +1092,7 @@ sub _set_paths
issue => "/etc/issue",
network_cache => "/tmp/network_cache.anvil",
passwd => "/etc/passwd",
reboot_cache => "/tmp/anvil.reboot-needed",
'redhat-release' => "/etc/redhat-release",
fences_unified_metadata => "/var/www/html/fences_unified_metadata.xml",
},
@ -1245,6 +1246,7 @@ sub _set_paths
nc => "/usr/bin/nc",
nmap => "/usr/bin/nmap",
nmcli => "/bin/nmcli",
nohup => "/usr/bin/nohup",
ocf_alteeve => "/usr/lib/ocf/resource.d/alteeve/server",
openssl => "/usr/bin/openssl",
'osinfo-query' => "/usr/bin/osinfo-query",

@ -632,125 +632,6 @@ ORDER BY
# Now loop through any attached DRs and add the VGs that are closest in sizes to the VGs we have in
# this Anvil! node.
$anvil->Database->get_dr_links({debug => 2});
# This was going to automatically add VGs from DR hosts to an Anvil's Storage Group, but given now
# that things can be linked in random ways, this doesn't seem to make sense anymore. So the code is
# (not complete but) preserved here in case we change out mind later.
=cut
foreach my $dr_link_host_name (sort {$a cmp $b} keys %{$anvil->data->{dr_links}{by_anvil_uuid}{$anvil_uuid}{dr_link_host_name}})
{
my $dr_link_uuid = $anvil->data->{dr_links}{by_anvil_uuid}{$anvil_uuid}{dr_link_host_name}{$dr_link_host_name}{dr_link_uuid};
my $dr_link_host_uuid = $anvil->Database->get_host_uuid_from_string({debug => $debug, string => $dr_link_host_name});
my $dr_link_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr_link_host_uuid}{short_host_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:dr_link_host_name' => $dr_link_host_name,
's2:dr_link_host_uuid' => $dr_link_host_uuid,
's3:dr_link_short_host_name' => $dr_link_short_host_name,
's4:dr_link_uuid' => $dr_link_uuid,
}});
# What VGs do these DR hosts have, and are they in any storage groups yet?
my $query = "
SELECT
scan_lvm_vg_uuid,
scan_lvm_vg_name,
scan_lvm_vg_size,
scan_lvm_vg_internal_uuid
FROM
scan_lvm_vgs
WHERE
scan_lvm_vg_host_uuid = ".$anvil->Database->quote($dr_link_uuid)."
AND
scan_lvm_vg_name != 'DELETED'
ORDER BY
scan_lvm_vg_size ASC
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $scan_lvm_vg_uuid = $row->[0];
my $scan_lvm_vg_name = $row->[1];
my $scan_lvm_vg_size = $row->[2];
my $scan_lvm_vg_internal_uuid = $row->[3];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:scan_lvm_vg_uuid' => $scan_lvm_vg_uuid,
's2:scan_lvm_vg_name' => $scan_lvm_vg_name,
's3:scan_lvm_vg_size' => $scan_lvm_vg_size." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $scan_lvm_vg_size}).")",
's4:scan_lvm_vg_internal_uuid' => $scan_lvm_vg_internal_uuid,
}});
# Is this VG already in a storage group for this anvil! node?
my $vg_found = 0;
foreach my $storage_group_uuid (keys %{$anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}})
{
my $storage_group_name = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:storage_group_uuid' => $storage_group_uuid,
's2:storage_group_name' => $storage_group_name,
}});
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{short_host_name}})
{
next if $this_host_uuid eq $dr_link_host_uuid;
my $storage_group_member_note = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$$this_host_uuid}{storage_group_member_note};
my $storage_group_member_vg_uuid = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$this_host_uuid}{vg_internal_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:this_host_uuid' => $this_host_uuid,
's2:storage_group_member_vg_uuid' => $storage_group_member_vg_uuid,
's3:storage_group_member_note' => $storage_group_member_note,
}});
next if $storage_group_member_note eq "DELETED";
if ($storage_group_member_vg_uuid eq $scan_lvm_vg_internal_uuid)
{
$vg_found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { vg_found => $vg_found }});
}
}
}
if (not $vg_found)
{
# Find the smallest
my $query = "
SELECT
scan_lvm_vg_uuid,
scan_lvm_vg_name,
scan_lvm_vg_size,
scan_lvm_vg_internal_uuid
FROM
scan_lvm_vgs
WHERE
scan_lvm_vg_host_uuid = ".$anvil->Database->quote($dr_link_uuid)."
AND
scan_lvm_vg_name != 'DELETED'
ORDER BY
scan_lvm_vg_size ASC
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $scan_lvm_vg_uuid = $row->[0];
my $scan_lvm_vg_name = $row->[1];
my $scan_lvm_vg_size = $row->[2];
my $scan_lvm_vg_internal_uuid = $row->[3];
}
}
}
}
=cut
return(0);
}
@ -2441,7 +2322,7 @@ sub get_peers
=head2 get_primary_host_uuid
This takes an Anvil! UUID and returns with the node's host UUID that is currently the "primary" node. That is to say, which node has the most servers running on it, by allocated RAM. For example, if node 1 has two servers, each with 8 GiB of RAN and node 2 has one VM with 32 GiB of RAM, node 2 will be considered primary as it would take longest to migrate servers off.
This takes an Anvil! UUID and returns with the node's host UUID that is currently the "primary" node. That is to say, which node has the most servers running on it, by allocated RAM. For example, if node 1 has two servers, each with 8 GiB of RAM and node 2 has one VM with 32 GiB of RAM, node 2 will be considered primary as it would take longest to migrate servers off.
If all is equal, node 1 is considered primary. If only one node is a cluster member, it is considered primary. If neither node is up, an empty string is returned.

@ -2251,6 +2251,8 @@ sub get_status
if ($is_local)
{
# Try rebuilding the module.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0328"});
my $problem = $anvil->DRBD->_initialize_kmod({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
@ -2264,7 +2266,7 @@ sub get_status
}});
if ($output =~ /modprobe: FATAL: Module drbd not found/i)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "error_0415", variables => {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0415", variables => {
output => $output,
return_code => $anvil->data->{drbd}{status}{$host}{return_code},
}});

@ -1245,6 +1245,11 @@ sub find_access
return("!!error!!");
}
if (exists $anvil->data->{network_access})
{
delete $anvil->data->{network_access};
}
my $host_uuid = $anvil->Get->host_uuid;
my $short_host_name = $anvil->Get->short_host_name;
my $target_short_host_name = $anvil->data->{hosts}{host_uuid}{$target_host_uuid}{short_host_name};

@ -1176,7 +1176,7 @@ sub _check_known_hosts_for_target
if (not $known_hosts)
{
# Nope.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, key => "log_0163", variables => { file => $$known_hosts }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, key => "log_0163", variables => { file => $known_hosts }});
return($known_machine)
}

@ -2180,27 +2180,29 @@ sub shutdown_virsh
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::connections" => $anvil->data->{sys}{database}{connections} }});
if ($anvil->data->{sys}{database}{connections})
{
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { anvil_uuid => $anvil_uuid }});
$server_uuid = $anvil->Get->server_uuid_from_name({
debug => $debug,
server_name => $server,
anvil_uuid => $anvil_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_uuid => $server_uuid }});
if (($server_uuid) && ($server_uuid ne "!!error!!"))
if ($anvil->data->{sys}{database}{connections})
{
$anvil->Database->get_servers({debug => $debug});
if (exists $anvil->data->{servers}{server_uuid}{$server_uuid})
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { anvil_uuid => $anvil_uuid }});
$server_uuid = $anvil->Get->server_uuid_from_name({
debug => $debug,
server_name => $server,
anvil_uuid => $anvil_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_uuid => $server_uuid }});
if (($server_uuid) && ($server_uuid ne "!!error!!"))
{
my $old_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_state => $old_state }});
if ($old_state ne "in shutdown")
$anvil->Database->get_servers({debug => $debug});
if (exists $anvil->data->{servers}{server_uuid}{$server_uuid})
{
# Update it.
my $query = "
my $old_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_state => $old_state }});
if ($old_state ne "in shutdown")
{
# Update it.
my $query = "
UPDATE
servers
SET
@ -2209,8 +2211,9 @@ SET
WHERE
server_uuid = ".$anvil->Database->quote($server_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
}
}
}
@ -2273,16 +2276,18 @@ WHERE
# Mark it as stopped now. (if we have a server_uuid, we have a database connection)
if ($server_uuid)
{
$anvil->Database->get_servers({debug => $debug});
if (exists $anvil->data->{servers}{server_uuid}{$server_uuid})
if ($anvil->data->{sys}{database}{connections})
{
my $old_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_state => $old_state }});
if ($old_state ne "shut off")
$anvil->Database->get_servers({debug => $debug});
if (exists $anvil->data->{servers}{server_uuid}{$server_uuid})
{
# Update it.
my $query = "
my $old_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_state => $old_state }});
if ($old_state ne "shut off")
{
# Update it.
my $query = "
UPDATE
servers
SET
@ -2293,8 +2298,9 @@ SET
WHERE
server_uuid = ".$anvil->Database->quote($server_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
}
}
}

@ -46,6 +46,7 @@ my $THIS_FILE = "System.pm";
# stop_daemon
# stty_echo
# update_hosts
# wait_on_dnf
# _check_anvil_conf
# _load_firewalld_zones
# _load_specific_firewalld_zone
@ -4440,7 +4441,7 @@ sub reload_daemon
This sets, clears or checks if the local system needs to be restart.
This returns C<< 1 >> if a reset is currently needed and C<< 0 >> if not.
This returns C<< 1 >> if a reset is currently needed and C<< 0 >> if not. In most cases, this is recorded in the database (variables -> variable_name = 'reboot::needed'). If there are no available databases, then the cache file '/tmp/anvil.reboot-needed' will be used, which wil contain the digit '0' or '1'.
Parameters;
@ -4460,6 +4461,8 @@ sub reboot_needed
my $set = defined $parameter->{set} ? $parameter->{set} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { set => $set }});
my $cache_file = $anvil->data->{path}{data}{reboot_cache};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { cache_file => $cache_file }});
if (($set) or ($set eq "0"))
{
### TODO: stop other systems from using this database.
@ -4467,34 +4470,67 @@ sub reboot_needed
if ($set eq "1")
{
# Set
$anvil->Database->insert_or_update_variables({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_value => "1",
variable_default => "0",
variable_description => "striker_0089",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_table => "hosts",
});
if ($anvil->data->{sys}{database}{connections})
{
$anvil->Database->insert_or_update_variables({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_value => "1",
variable_default => "0",
variable_description => "striker_0089",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_table => "hosts",
});
}
else
{
# Record that a reboot is needed in a temp file.
my $failed = $anvil->Storage->write_file({
debug => $debug,
overwrite => 1,
file => $cache_file,
body => 1,
user => "root",
group => "root",
mode => "0644",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { failed => $failed }});
}
}
elsif ($set eq "0")
{
# Clear
$anvil->Database->insert_or_update_variables({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_value => "0",
variable_default => "0",
variable_description => "striker_0089",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_table => "hosts",
});
if ($anvil->data->{sys}{database}{connections})
{
$anvil->Database->insert_or_update_variables({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_value => "0",
variable_default => "0",
variable_description => "striker_0089",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_table => "hosts",
});
}
else
{
my $failed = $anvil->Storage->write_file({
debug => $debug,
overwrite => 1,
file => $cache_file,
body => 0,
user => "root",
group => "root",
mode => "0644",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { failed => $failed }});
}
}
else
{
@ -4504,19 +4540,32 @@ sub reboot_needed
}
}
my ($reboot_needed, $variable_uuid, $modified_date) = $anvil->Database->read_variable({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_source_table => "hosts",
variable_source_uuid => $anvil->Get->host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
reboot_needed => $reboot_needed,
variable_uuid => $variable_uuid,
modified_date => $modified_date,
}});
# Read from the cache file, if it exists.
my $reboot_needed = 0;
if (-e $cache_file)
{
$reboot_needed = $anvil->Storage->read_file({
debug => $debug,
file => $cache_file,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { reboot_needed => $reboot_needed }});
}
elsif ($anvil->data->{sys}{database}{connections})
{
($reboot_needed, my $variable_uuid, my $modified_date) = $anvil->Database->read_variable({
debug => $debug,
file => $THIS_FILE,
line => __LINE__,
variable_name => "reboot::needed",
variable_source_table => "hosts",
variable_source_uuid => $anvil->Get->host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
reboot_needed => $reboot_needed,
variable_uuid => $variable_uuid,
modified_date => $modified_date,
}});
}
if ($reboot_needed eq "")
{
@ -5341,6 +5390,60 @@ sub update_hosts
return(0);
}
=head2 wait_on_dnf
This method checks to see if 'dnf' is running and, if so, won't return until it finishes. This is useful when holding off doing certain tasks, like building kernel modules, while an OS update is under way.
This method takes no parameters.
=cut
sub wait_on_dnf
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "System->wait_on_dnf()" }});
my $next_log = time - 1;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
next_log => $next_log,
waiting => $waiting,
}});
while ($waiting)
{
my $pids = $anvil->System->pids({program_name => $anvil->data->{path}{exe}{dnf}, debug => $debug});
my $dnf_instances = @{$pids};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { dnf_instances => $dnf_instances }});
if ($dnf_instances)
{
if (time > $next_log)
{
my $say_pids = "";
foreach my $pid (@{$pids})
{
$say_pids .= $pid.", ";
}
$say_pids =~ s/, $//;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0325", variables => { pids => $say_pids }});
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { next_log => $next_log }});
}
sleep 10;
}
else
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { waiting => $waiting }});
}
}
return(0);
}
# =head3
#
# Private Functions;

@ -256,7 +256,8 @@ getent passwd %{suiapi} >/dev/null \
--user-group \
%{suiapi}
if [ $1 -gt 1 ]; then # >1=Upgrade
# Check to see if we're updating.
if grep -q apache /etc/passwd; then
# Disable and stop apache to free the port.
systemctl disable --now httpd.service
# Transfer files owned by apache to Striker UI API user.

@ -17,15 +17,23 @@ dist_man8_MANS = \
anvil-manage-alerts.8 \
anvil-manage-dr.8 \
anvil-manage-files.8 \
anvil-manage-keys.1 \
anvil-manage-power.8 \
anvil-manage-server.8 \
anvil-manage-server-storage.8 \
anvil-manage-storage-groups.8 \
anvil-report-usage.8 \
anvil-safe-start.8 \
anvil-safe-stop.8 \
anvil-shutdown-server.8 \
anvil-special-operations.8 \
anvil-update-system.8 \
anvil-watch-drbd.8 \
scancore.8 \
striker-check-machines.8 \
striker-collect-debug.8 \
striker-initialize-host.8 \
striker-update-cluster.8

@ -23,10 +23,10 @@ When logging, record sensitive data, like passwords.
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-job-uuid\fR <uuid>
\fB\-\-job\-uuid\fR <uuid>
This is set to the job UUID when the request to boot is coming from a database job. When set, the referenced job will be updated and marked as complete / failed when the run completes.
.TP
\fB\-\-no-wait\fR
\fB\-\-no\-wait\fR
This controls whether the request to boot the server waits for the server to actually boot up before returning. Normally, the program will check every couple of seconds to see if the server has actually booted before returning. Setting this tells the program to return as soon as the request to boot the server has been passed on to the resource manager.
.TP
\fB\-\-server\fR <all|name|uuid>
@ -34,7 +34,7 @@ This is either 'all', the name, or server UUID (as set in the definition XML) of
.TP
When set to 'all', all servers assigned to the local sub-cluster are booted. Servers on other Anvil! nodes are not started.
.TP
\fB\-\-server-uuid\fR <uuid>
\fB\-\-server\-uuid\fR <uuid>
This is the server UUID of the server to boot. Generally this isn't needed, except when two servers somehow share the same name. This should not be possible, but this option exists in case it happens anyway.
.TP
\fB\-\-wait\fR

@ -0,0 +1,36 @@
.\" Manpage for the Anvil! tool to report the usage of servers and Anvil! nodes
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-report-usage "8" "July 22 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-report-usage \- This program reports the current resource usage of servers and the available resources remaining on Anvil! nodes
.SH SYNOPSIS
.B anvil-report-usage
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program displays the resource utilization of servers and the resources available (used and free) on Anvil! nodes.
.TP
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-detailed\fR
.TP
This displays additional information about the resources used by servers on the node. This only matters for human-readable display, when using '\fB\-\-machine\fR', all data is reported.
.TP
\fB\-\-machine\fR
.TP
Outputs the data in a machine-parsable format
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,29 @@
.\" Manpage for the Anvil! tool to safely start an Anvil! node's subnode.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-safe-start "8" "July 22 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-safe-start \- This program safely joins an Anvil! subnode to a node.
.SH SYNOPSIS
.B anvil-safe-start
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program will safely join an Anvil! subnode to an Anvil! node. If both nodes are starting, it will communicate with the peer, once available. This includes booting hosted servers.
.TP
NOTE: This tool runs at boot (or not) via the 'anvil-safe-start.service' systemd daemon.
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
NOTE: This tool takes no specific commands.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,45 @@
.\" Manpage for the Anvil! safely stopping Anvil! node hosts
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-safe-stop "8" "July 22 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-safe-stop \- This program safely stop a subnode in an Anvil! node, and DR hosts
.SH SYNOPSIS
.B anvil-safe-stop
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program will safely withdraw a subnode from an Anvil! node, and safely stop DR hosts. Optionally, it can also power off the machine.
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-no\-db\fR
.TP
This tells this program to run without connecting to the Striker databases. This should only be used if the Strikers are not available (either they're off, or they've been updated and this host hasn't been, and can't use them until this host is also updated).
.TP
NOTE: This is generally only used by 'striker-update-cluster'.
.TP
\fB\-\-poweroff\fR, \fB\-\-power\-off\fR
.TP
By default, the host will remain powered on when this program exits. Using this switch will have the host power off once the host is safely stopped.
.TP
\fB\-\-stop\-reason\fR <user, power, thermal>
.TP
Optionally used to set 'system::stop_reason' reason for this host. Valid values are 'user' (default), 'power' and 'thermal'. If set to 'user', ScanCore will not turn this host back on. If 'power', then ScanCore will reboot the host once the power under the host looks safe again. If thermal, then ScanCore will reboot the host once themperatures are back into safe levels.
.TP
\fB\-\-stop\-servers\fR
.TP
By default, on Anvil! sub-nodes, any servers running on this host will be migrated to the peer subnode. If the peer isn't available, this will refuse to stop. Using this switch will instead tell the system to stop all servers running on this host.
.TP
NOTE: On DR hosts, any running servers are always stopped.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,45 @@
.\" Manpage for the Anvil! server shutdown tool
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-shutdown-server "8" "July 20 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-shutdown-server \- This program shuts down servers hosted on the Anvil! cluster.
.SH SYNOPSIS
.B anvil-shutdown-server
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program shuts down a server that is running on a Anvil! node or DR host. It can optionally stop all servers.
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-no\-db\fR
.TP
This tells the program to run without connecting to any databases. This is used mainly when the host is being taken down as part of a cluster-wise upgrade.
.TP
\fB\-\-no\-wait\fR
.TP
This tells the program to call the shut down, but not wait for the server to actually stop. By default, when shutting down one specific server, this program will wait for the server to be off before it returns.
.TP
\fB\-\-server\fR {<name>,all}
.TP
This is the name of the server to shut down. Optionally, this can be 'all' to shut down all servers on this host.
.TP
\fB\-\-server\-uuid\fR <uuid>
.TP
This is the server UUID of the server to shut down. NOTE: This can not be used with \fB\-\-no\-db\fR.
.TP
\fB\-\-wait\fR
.TP
This tells the program to wait for the server(s) to stop before returning. By default, when '\fB\-\-server all\fR' is used,, the shutdown will NOT wait. This makes the shutdowns sequential.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -29,6 +29,12 @@ Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a s
.TP
This will force the dnf cache to be cleared before the OS update is started. This slows the update down a bit, but ensures the latest updates are installed.
.TP
\fB\-\-no\-db\fR
.TP
This tells the update tool to run without a database connection. This is needed if the Striker dashboards are already updated, and the local system may no longer be able to talk to them.
.TP
NOTE: After the OS update is complete, an attempt will be made to connect to the database(s). This allows for registering a request to reboot if needed.
.TP
\fB\-\-no\-reboot\fR
.TP
If the kernel is updated, the system will normally be rebooted. This switch prevents the reboot from occuring.

@ -1,10 +1,10 @@
.\" Manpage for the Anvil! machine power and access reporting tool.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH striker-collect-data "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform"
.TH striker-collect-debug "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
striker-collect-data \- This program collects data needed to help diagnose problems with an Anvil! system.
striker-collect-debug \- This program collects data needed to help diagnose problems with an Anvil! system.
.SH SYNOPSIS
.B striker-collect-data
.B striker-collect-debug
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program collects database data, logs, config files and other information needed to help diagnose problems with the Anvil! platform. By default, this collects all data from all accessible machines.

@ -54,6 +54,12 @@ See \fB\-\-reboot\fR for rebooting if anything is updated.
Normally, the system will only reboot if the kernel is updated. If this is used, and if any packages are updated, then a reboot will be performed. This is recommended in most cases.
.TP
Must be used with \fB\-\-reboot\-self\fR to reboot the local system. Otherwise, it is passed along to target machines via their anvil-update-system calls.
.TP
\fB\-\-timeout\fR <seconds, Nm, Nh>
.TP
When given, if a system update doesn't complete in this amount of time, error out and abort the update. By default, updates will wait for 24 hours.
.TP
If this is set to an integer, it is treated as a number of seconds. If this ends in 'm' or 'h', then the preceding number is treated as a number of minutes or hours, respectively.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.

@ -102,6 +102,9 @@ check_fence_delay($anvil);
# Check for failed resources or resources that need updates
check_resources($anvil);
# Check for stale data in the CIB and clean up any that are found
cib_cleanup($anvil);
# Shut down.
$anvil->ScanCore->agent_shutdown({agent => $THIS_FILE});
@ -110,6 +113,98 @@ $anvil->ScanCore->agent_shutdown({agent => $THIS_FILE});
# Functions #
#############################################################################################################
sub cib_cleanup
{
my ($anvil) = @_;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Not in a cluster
return(0);
}
# Find the servers still on the cluster
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }});
if (exists $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{'exists'})
{
my $attribute = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{attribute};
my $operation = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{operation};
my $value = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_name => $server_name,
attribute => $attribute,
operation => $operation,
value => $value,
}});
}
}
my $host_name = $anvil->Get->host_name;
my $short_host_name = $anvil->Get->short_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_name => $host_name,
short_host_name => $short_host_name,
}});
my $node_id = "";
my $node_name = "";
foreach my $node (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node => $node }});
if (($node eq $short_host_name) or ($node eq $host_name))
{
$node_id = $anvil->data->{cib}{parsed}{data}{node}{$node}{id};
$node_name = $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{uname};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
node_id => $node_id,
node_name => $node_name,
}});
last;
}
}
if ($node_id eq "")
{
# Node ID for this node was not found!
return(0);
}
foreach my $attribute_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { attribute_id => $attribute_id }});
if ($attribute_id =~ /^drbd-fenced_(.*)$/)
{
my $server_name = $1;
my $state = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{$attribute_id};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_name => $server_name,
'state' => $state,
}});
if (not exists $anvil->data->{cib}{parsed}{data}{server}{$server_name})
{
# Stale attribute, remove it!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "scan_cluster_log_0012", variables => { attribute => $attribute_id }});
my $shell_call = $anvil->data->{path}{exe}{pcs}." node attribute ".$node_name." ".$attribute_id."=";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
}
return(0);
}
# This looks for failed resource and, if found, tries to recover them.
sub check_resources
{

@ -53,6 +53,7 @@ In Maintenance Mode: ..... [#!variable!maintenance_mode!#]
<key name="scan_cluster_log_0009">The server was found to be running, but not here (or this node is not fully in the cluster). NOT attempting recovery yet.</key>
<key name="scan_cluster_log_0010">Attempting recovery now...</key>
<key name="scan_cluster_log_0011">Checking to see if the server has recovered yet...</key>
<key name="scan_cluster_log_0012">Found the stale DRBD fenced attribute: [#!variable!attribute!#], removing it.</key>
<!-- Message entries (usually meant to be alerts) -->
<key name="scan_cluster_message_0001"></key>

@ -885,10 +885,10 @@ DELETED - Marks a server as no longer existing
# config change.
my $variables = {
server => $server_name,
old_ram_in_use_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_in_use}),
old_ram_in_use_bytes => $anvil->Convert->add_commas({number => $server_ram_in_use}),
new_ram_in_use_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $old_server_ram_in_use}),
new_ram_in_use_bytes => $anvil->Convert->add_commas({number => $old_server_ram_in_use}),
old_ram_in_use_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $old_server_ram_in_use}),
old_ram_in_use_bytes => $anvil->Convert->add_commas({number => $old_server_ram_in_use}),
new_ram_in_use_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram_in_use}),
new_ram_in_use_bytes => $anvil->Convert->add_commas({number => $server_ram_in_use}),
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_server_alert_0013", variables => $variables});
$anvil->Alert->register({alert_level => "notice", message => "scan_server_alert_0013", variables => $variables, set_by => $THIS_FILE});
@ -902,10 +902,10 @@ DELETED - Marks a server as no longer existing
my $say_ram = $server_configured_ram eq $server_ram_in_use ? "scan_server_alert_0014" : "scan_server_alert_0015";
my $variables = {
server => $server_name,
old_configured_ram_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_configured_ram}),
old_configured_ram_bytes => $anvil->Convert->add_commas({number => $server_configured_ram}),
new_configured_ram_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $old_server_configured_ram}),
new_configured_ram_bytes => $anvil->Convert->add_commas({number => $old_server_configured_ram}),
old_configured_ram_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $old_server_configured_ram}),
old_configured_ram_bytes => $anvil->Convert->add_commas({number => $old_server_configured_ram}),
new_configured_ram_short => $anvil->Convert->bytes_to_human_readable({'bytes' => $server_configured_ram}),
new_configured_ram_bytes => $anvil->Convert->add_commas({number => $server_configured_ram}),
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => $say_ram, variables => $variables});
$anvil->Alert->register({alert_level => "notice", message => $say_ram, variables => $variables, set_by => $THIS_FILE});

@ -366,12 +366,12 @@ The attempt to start the cluster appears to have failed. The return code '0' was
<key name="error_0257"><![CDATA[No server specified to boot. Please use '--server <name|all>' or '--server-uuid <UUID>.]]></key>
<key name="error_0258">This host is not a node or DR, unable to boot servers.</key>
<key name="error_0259">The definition file: [#!variable!definition_file!#] doesn't exist, unable to boot the server.</key>
<key name="error_0260">This host is not in an Anvil! system, aborting.</key>
<key name="error_0260">This subnode is not in an Anvil! node yet, aborting.</key>
<key name="error_0261">The definition file: [#!variable!definition_file!#] exists, but the server: [#!variable!server!#] does not appear to be in the cluster. Unable to boot it.</key>
<key name="error_0262">The server: [#!variable!server!#] status is: [#!variable!status!#]. We can only boot servers that are off, not booting it.</key>
<key name="error_0263"><![CDATA[No server specified to shut down. Please use '--server <name|all>' or '--server-uuid <UUID>.]]></key>
<key name="error_0264">This host is not a node or DR, unable to shut down servers.</key>
<key name="error_0265">This feature isn't enabled on DR hosts yet.</key>
<key name="error_0265">Specifying a server to shutdown using a UUID is not available when there are no DB connections.</key>
<key name="error_0266">The server: [#!variable!server!#] does not appear to be in the cluster. Unable to shut it down.</key>
<key name="error_0267">The server: [#!variable!server!#] failed to boot. The reason why should be in the logs.</key>
<key name="error_0268">The server: [#!variable!server!#] failed to shut down. The reason why should be in the logs.</key>
@ -1562,7 +1562,7 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0467">Update the base operating system.</key>
<key name="job_0468">This uses 'dnf' to do an OS update on the host. If this is run on a node, 'anvil-safe-stop' will be called to withdraw the subnode from the node's cluster. If the peer subnode is also offline, hosted servers will be shut down.</key>
<key name="job_0469">Update beginning. Verifying all known machines are accessible...</key>
<key name="job_0470"></key>
<key name="job_0470">This is a DR host, no migration possible.</key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>
@ -2254,7 +2254,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0595">Updated the lvm.conf file to add the filter: [#!variable!filter!#] to prevent LVM from seeing the DRBD devices as LVM devices.</key>
<key name="log_0596">The host: [#!variable!host_name!#] last updated the database: [#!variable!difference!#] seconds ago, skipping power checks.</key>
<key name="log_0597">The host: [#!variable!host_name!#] has no entries in the 'updated' table, so ScanCore has likely never run. Skipping this host for now.</key>
<key name="log_0598">This host is not a node, this program isn't designed to run here.</key>
<key name="log_0598">This host is not an Anvil! sub node, this program isn't designed to run here.</key>
<key name="log_0599">Enabled 'anvil-safe-start' locally on this node.</key>
<key name="log_0600">Enabled 'anvil-safe-start' on both nodes in this Anvil! system.</key>
<key name="log_0601">Disabled 'anvil-safe-start' locally on this node.</key>
@ -2407,6 +2407,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0740">Running the scan-agent: [#!variable!agent!#] now to ensure that the database has an updated view of resources.</key>
<key name="log_0741">I was about to start: [#!variable!command!#] with the job UUID: [#!variable!this_job_uuid!#]. However, another job using the same command with the job UUID: [#!variable!other_job_uuid!#]. To avoid race conditions, only one process with a given command is run at the same time.</key>
<key name="log_0742">The job with the command: [#!variable!command!#] and job UUID: [#!variable!job_uuid!#] is restarting.</key>
<key name="log_0743">Will run without connecting to the databases. Some features will be unavailable.</key>
<key name="log_0744">A cached request to reboot this host was found (likely from a --no-db update). Registering a job to reboot now!</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -2741,7 +2743,7 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0230">The 'anvil-safe-start' tool is disabled on this node and enabled on the peer.</key>
<key name="message_0231">The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway.</key>
<key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key>
<key name="message_0233">It appears that another instance of 'anvil-safe-start' is already runing. Please wait for it to complete (or kill it manually if needed).</key>
<key name="message_0233">It appears that another instance of: [#!variable!program!#] is already runing. Please wait for it to complete (or kill it manually if needed).</key>
<key name="message_0234">Preparing to rename a server.</key>
<key name="message_0235">Preparing to rename stop this node.</key>
<key name="message_0236">This records how long it took to migrate a given server. The average of the last five migations is used to guess how long future migrations will take.</key>
@ -2920,6 +2922,12 @@ Proceed? [y/N]</key>
<key name="message_0321">Removing the old drbd-kmod RPMs now.</key>
<key name="message_0322">Installing the latest DRBD kmod RPM now.</key>
<key name="message_0323">Retrying the OS update now.</key>
<key name="message_0324">Update almost complete. Picked this job up after a '--no-db' run, and now we have database access again.</key>
<key name="message_0325">[ Note ] - It looks like 'dnf' (pid(s): [#!variable!pids!#]) is running, holding our start up until it's done (in case the system is being updated now).</key>
<key name="message_0326">This daemon just started. Holding off starting jobs for another: [#!variable!will_start_in!#] second(s).</key>
<key name="message_0327">[ Note ] - It looks like 'anvil-version-changes' (pid(s): [#!variable!pids!#]) is running, holding off on power action until it's done (in case the system is being updated now or kernel modules are being built).</key>
<key name="message_0328">[ Note ] - The DRBD (replicated storage) kernel module appears to not exist. This is normal after an OS update, will try building the kernel module now. Please be patient.</key>
<key name="message_0329">[ Note ] - Deleting the old drbd fenced attribute: [#!variable!attribute!#] for the node: [#!variable!node_name!#] (ID: [#!variable!node_id!#]) from the CIB.</key>
<!-- Translate names (protocols, etc) -->
<key name="name_0001">Normal Password</key> <!-- none in mail-server -->
@ -3266,6 +3274,11 @@ If you are comfortable that the target has changed for a known reason, you can s
<key name="striker_0299">Migration Network link #!variable!number!#</key>
<key name="striker_0300">This is where you configure the optional network dedicated to RAM-copy during live migrations.</key>
<key name="striker_0301">This puts a temporary hold on a DRBD minor number or TCP port so that it isn't used again in the time between when it was queried as the next free number, and before it can be used.</key>
<key name="striker_0302">This indicates when, in unix time, the database was last aged-out.</key>
<key name="striker_0303">This indicates when, in unix time, the database was last archived.</key>
<key name="striker_0304">This indicates when, in unix time, the local install target data was updated.</key>
<key name="striker_0305">This indicates when, in unix time, the OUI data was last update. The OUI data is a list of MAC address prefixes and which companies they've been assigned to.</key>
<key name="striker_0306">This indicates when, in unix time, the network was last scanned. This is done to determine what IPs are used by servers on the Anvil! cluster, and to try to identify foundation pack devices on the network. These scans are simple ping sweeps used to get the MAC addresses of devices with IPs.</key>
<!-- These are generally units and appended to numbers -->
<key name="suffix_0001">#!variable!number!#/sec</key>

@ -1687,7 +1687,10 @@ AND
{
# To make logs more sensible, we'll call 'problem' as 'out_of_cluster'.
my ($out_of_cluster) = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { out_of_cluster => $out_of_cluster }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
out_of_cluster => $out_of_cluster,
"cib::parsed::local::ready" => $anvil->data->{cib}{parsed}{'local'}{ready},
}});
if ((not $out_of_cluster) && ($anvil->data->{cib}{parsed}{'local'}{ready}))
{
# We're in a cluster, abort.

@ -67,6 +67,9 @@ if (($< != 0) && ($> != 0))
# If, so some reason, anvil.conf is lost, create it.
$anvil->System->_check_anvil_conf();
# If dnf is running, hold.
$anvil->System->wait_on_dnf();
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect({
@ -117,7 +120,13 @@ if (not $anvil->data->{sys}{database}{connections})
}
# Read switches
$anvil->Get->switches({list => ["clear-mapping", "refresh-json", "run-once", "main-loop-only", "no-start", "startup-only"], man => $THIS_FILE});
$anvil->Get->switches({list => [
"clear-mapping",
"refresh-json",
"run-once",
"main-loop-only",
"no-start",
"startup-only"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
@ -136,6 +145,10 @@ if ($anvil->data->{switches}{'refresh-json'})
# This is used to track initial checkes / repairs of network issues.
$anvil->data->{sys}{network}{initial_checks} = 0;
# We use this to delay starting jobs for a short time.
our $start_time = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }});
# There are some things we only want to run on (re)start and don't need to always run.
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'};
@ -668,54 +681,158 @@ sub handle_periodic_tasks
### recently enough.
if ($host_type eq "striker")
{
# Age out old data. This takes up to a minute.
$anvil->Database->_age_out_data();
# Archive old data
$anvil->Database->archive_database();
### TODO: This is here only to handle the period of time where we disabled postgres
### on boot. This should be removed sometime after 2022-08-01
$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
#$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
# Record a job, don't call it directly. It takes too long to run.
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
job_data => "",
job_name => "install-target::refresh",
job_title => "job_0015",
job_description => "job_0017",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }});
my $host_uuid = $anvil->Get->host_uuid();
my ($last_age_out, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::aged-out"});
my $time_since_last_age_out = $last_age_out =~ /^\d+$/ ? time - $last_age_out : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_uuid' => $host_uuid,
's2:last_age_out' => $last_age_out,
's3:time_since_last_age_out' => $time_since_last_age_out,
}});
# Update the OUI data.
($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
job_data => "",
job_name => "oui-data::refresh",
job_title => "job_0064",
job_description => "job_0065",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Run an age-out?
if ($time_since_last_age_out > 86400)
{
# Age out old data. This takes up to a minute.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "database::".$host_uuid."::aged-out",
variable_value => time,
variable_default => "0",
variable_description => "striker_0302",
variable_section => "database",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
$anvil->Database->_age_out_data();
}
# Scan the networks
($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
job_data => "",
job_name => "scan-network::refresh",
job_title => "job_0066",
job_description => "job_0067",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Run an archive?
my ($last_archive, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::archived"});
my $time_since_last_archive = $last_archive =~ /^\d+$/ ? time - $last_archive : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_archive' => $last_archive,
's2:time_since_last_archive' => $time_since_last_archive,
}});
if ($time_since_last_archive > 86400)
{
# Archive old data
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "database::".$host_uuid."::archived",
variable_value => time,
variable_default => "0",
variable_description => "striker_0303",
variable_section => "database",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
$anvil->Database->archive_database();
}
# Run the install target update?
my ($last_mit, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target"});
my $time_since_last_mit = $last_mit =~ /^\d+$/ ? time - $last_mit : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_mit' => $last_mit,
's2:time_since_last_mit' => $time_since_last_mit,
}});
if ($time_since_last_mit > 86400)
{
# Update the local install target data.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target",
variable_value => time,
variable_default => "0",
variable_description => "striker_0304",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
job_data => "",
job_name => "install-target::refresh",
job_title => "job_0015",
job_description => "job_0017",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }});
}
# Update the OUI data?
my ($last_parse_oui, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-parse-oui"});
my $time_since_last_parse_oui = $last_parse_oui =~ /^\d+$/ ? time - $last_parse_oui : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_parse_oui' => $last_parse_oui,
's2:time_since_last_parse_oui' => $time_since_last_parse_oui,
}});
if ($time_since_last_parse_oui > 86400)
{
# Yup.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::striker-parse-oui",
variable_value => time,
variable_default => "0",
variable_description => "striker_0305",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
job_data => "",
job_name => "oui-data::refresh",
job_title => "job_0064",
job_description => "job_0065",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
}
# Scan the network?
my ($last_network_scan, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-scan-network"});
my $time_since_last_network_scan = $last_network_scan =~ /^\d+$/ ? time - $last_network_scan : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_network_scan' => $last_network_scan,
's2:time_since_last_network_scan' => $time_since_last_network_scan,
}});
if ($time_since_last_parse_oui > 86400)
{
# Yup.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::striker-scan-network",
variable_value => time,
variable_default => "0",
variable_description => "striker_0306",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
job_data => "",
job_name => "scan-network::refresh",
job_title => "job_0066",
job_description => "job_0067",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
}
}
# Update the next check time.
@ -1402,12 +1519,32 @@ sub keep_running
sub run_jobs
{
my ($anvil, $startup) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { startup => $startup }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup => $startup }});
# Don't start jobs for 30 seconds after startup.
if (not $startup)
{
my $time_since_start = time - $start_time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
time_since_start => $time_since_start,
start_time => $start_time,
}});
if ($time_since_start < 60)
{
# Log that we'll start jobs in X seconds.
my $will_start_in = 60 - $time_since_start;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0326", variables => { will_start_in => $will_start_in }});
return(0);
}
}
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's
# changed on disk.
$anvil->data->{sys}{jobs_running} = 0;
# If we're not configured, we won't hold on starting jobs
my $configured = $anvil->System->check_if_configured;
# We'll also update the jobs.json file.
my $jobs_file = "{\"jobs\":[\n";
@ -1482,7 +1619,7 @@ sub run_jobs
}
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
if (($startup) && ($job_status ne "anvil_startup"))
if (($startup) && ($configured) && ($job_status ne "anvil_startup"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
job_uuid => $job_uuid,

@ -260,6 +260,9 @@ WHERE
}});
}
# Make sure drbd-fence attributes are deleted for this server.
clear_cib($anvil);
# We're done
$anvil->Job->update_progress({
progress => 100,
@ -270,6 +273,72 @@ WHERE
return(0);
}
# Make sure drbd-fence attributes are deleted for this server.
sub clear_cib
{
my ($anvil) = @_;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Not in a cluster
return(0);
}
my $server_uuid = $anvil->data->{job}{server_uuid};
my $server_name = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name};
my $host_name = $anvil->Get->host_name;
my $short_host_name = $anvil->Get->short_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_uuid => $server_uuid,
server_name => $server_name,
host_name => $host_name,
short_host_name => $short_host_name,
}});
# Find attributes
foreach my $node_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{node_state}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node_id => $node_id }});
foreach my $attribute_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { attribute_id => $attribute_id }});
if ($attribute_id =~ /^drbd-fenced_(.*)$/)
{
my $this_server_name = $1;
my $state = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{$attribute_id};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
this_server_name => $server_name,
'state' => $state,
}});
if ($this_server_name eq $server_name)
{
# Stale attribute, remove it!
my $node_name = $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{uname};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0329", variables => {
attribute => $attribute_id,
node_name => $node_name,
node_id => $node_id,
}});
my $shell_call = $anvil->data->{path}{exe}{pcs}." node attribute ".$node_name." ".$attribute_id."=";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
}
}
return(0);
}
# This checks to see if the server is running and, if so, stops it. Once stopped, the resource is deleted.
sub remove_from_pacemaker
{

@ -244,6 +244,43 @@ sub do_poweroff
host_status => $task eq "poweroff" ? "rebooting" : "stopping",
});
# Wait if anvil-version-change is running.
my $next_log = time - 1;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
waiting => $waiting,
}});
while ($waiting)
{
my $pids = $anvil->System->pids({program_name => $anvil->data->{path}{exe}{'anvil-version-changes'}});
my $avc_instances = @{$pids};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { avc_instances => $avc_instances }});
if ($avc_instances)
{
if (time > $next_log)
{
my $say_pids = "";
foreach my $pid (@{$pids})
{
$say_pids .= $pid.", ";
}
$say_pids =~ s/, $//;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0325", variables => { pids => $say_pids }});
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
}
sleep 10;
}
else
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
# If we have a job UUID, mark that we're done.
if ($job_uuid)
{

@ -2242,13 +2242,16 @@ sub check_drbd_peer_access
target => $this_host,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
foreach my $preferred_network ("bcn", "mn", "ifn", "sn", "any")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
next if $access;
next if $network_name !~ /^$preferred_network/;
if (($network_name !~ /^$preferred_network/) && ($preferred_network ne "any"))
{
next;
}
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({target => $target_ip});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {

@ -17,11 +17,12 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
my $anvil = Anvil::Tools->new();
$anvil->data->{switches}{detailed} = 0;
$anvil->Get->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::detailed" => $anvil->data->{switches}{detailed},
}});
# Read switches
$anvil->Get->switches({list => [
"detailed",
"machine"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
@ -70,13 +71,15 @@ sub collect_anvil_data
"s2:anvil_uuid" => $anvil_uuid,
}});
$anvil->data->{anvil_data}{$anvil_name}{anvil_uuid} = $anvil_uuid;
$anvil->data->{anvil_data}{$anvil_name}{description} = $anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_description};
$anvil->data->{anvil_data}{$anvil_name}{node1_host_uuid} = $anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_node1_host_uuid};
$anvil->data->{anvil_data}{$anvil_name}{node2_host_uuid} = $anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_node2_host_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:anvil_data::${anvil_name}::anvil_description" => $anvil->data->{anvil_data}{$anvil_name}{description},
"s2:anvil_data::${anvil_name}::node1_host_uuid" => $anvil->data->{anvil_data}{$anvil_name}{node1_host_uuid},
"s3:anvil_data::${anvil_name}::node2_host_uuid" => $anvil->data->{anvil_data}{$anvil_name}{node2_host_uuid},
"s1:anvil_data::${anvil_name}::anvil_uuid" => $anvil->data->{anvil_data}{$anvil_name}{anvil_uuid},
"s2:anvil_data::${anvil_name}::anvil_description" => $anvil->data->{anvil_data}{$anvil_name}{description},
"s3:anvil_data::${anvil_name}::node1_host_uuid" => $anvil->data->{anvil_data}{$anvil_name}{node1_host_uuid},
"s4:anvil_data::${anvil_name}::node2_host_uuid" => $anvil->data->{anvil_data}{$anvil_name}{node2_host_uuid},
}});
if (length($anvil_name) > $anvil->data->{longest}{anvil_name})
@ -141,15 +144,25 @@ sub collect_anvil_data
$bridges =~ s/, $//;
# Store
$anvil->data->{anvil_data}{$anvil_name}{cpu_cores} = $cpu_cores;
$anvil->data->{anvil_data}{$anvil_name}{cpu_threads} = $cpu_threads;
$anvil->data->{anvil_data}{$anvil_name}{cpu_string} = $say_cpu;
$anvil->data->{anvil_data}{$anvil_name}{ram_used_string} = $say_ram_hardware;
$anvil->data->{anvil_data}{$anvil_name}{ram_available} = $ram_available;
$anvil->data->{anvil_data}{$anvil_name}{ram_used} = $ram_used;
$anvil->data->{anvil_data}{$anvil_name}{ram_hardware} = $ram_hardware;
$anvil->data->{anvil_data}{$anvil_name}{ram_free_string} = $say_ram_available;
$anvil->data->{anvil_data}{$anvil_name}{bridge_string} = $bridges;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:anvil_data::${anvil_name}::cpu_string" => $anvil->data->{anvil_data}{$anvil_name}{cpu_string},
"s2:anvil_data::${anvil_name}::ram_used_string" => $anvil->data->{anvil_data}{$anvil_name}{ram_used_string},
"s3:anvil_data::${anvil_name}::ram_free_string" => $anvil->data->{anvil_data}{$anvil_name}{ram_free_string},
"s4:anvil_data::${anvil_name}::bridge_string" => $anvil->data->{anvil_data}{$anvil_name}{bridge_string},
"s1:anvil_data::${anvil_name}::cpu_cores" => $anvil->data->{anvil_data}{$anvil_name}{cpu_cores},
"s2:anvil_data::${anvil_name}::cpu_threads" => $anvil->data->{anvil_data}{$anvil_name}{cpu_threads},
"s3:anvil_data::${anvil_name}::cpu_string" => $anvil->data->{anvil_data}{$anvil_name}{cpu_string},
"s4:anvil_data::${anvil_name}::ram_used_string" => $anvil->data->{anvil_data}{$anvil_name}{ram_used_string},
"s5:anvil_data::${anvil_name}::ram_available" => $anvil->data->{anvil_data}{$anvil_name}{ram_available},
"s6:anvil_data::${anvil_name}::ram_used" => $anvil->data->{anvil_data}{$anvil_name}{ram_used},
"s7:anvil_data::${anvil_name}::ram_hardware" => $anvil->data->{anvil_data}{$anvil_name}{ram_hardware},
"s8:anvil_data::${anvil_name}::ram_free_string" => $anvil->data->{anvil_data}{$anvil_name}{ram_free_string},
"s9:anvil_data::${anvil_name}::bridge_string" => $anvil->data->{anvil_data}{$anvil_name}{bridge_string},
}});
if (length($anvil->data->{anvil_data}{$anvil_name}{cpu_string}) > $anvil->data->{longest}{host_cpu_string})
@ -199,11 +212,17 @@ sub collect_anvil_data
's5:sg_used' => $anvil->Convert->add_commas({number => $sg_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $sg_used}).")",
}});
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_used_size} = $anvil->Convert->bytes_to_human_readable({'bytes' => $sg_used});
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_free_size} = $anvil->Convert->bytes_to_human_readable({'bytes' => $free_size});
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{storage_group_uuid} = $storage_group_uuid;
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{used_size} = $sg_used;
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{free_size} = $free_size;
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_used_size} = $anvil->Convert->bytes_to_human_readable({'bytes' => $sg_used});
$anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_free_size} = $anvil->Convert->bytes_to_human_readable({'bytes' => $free_size});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:anvil_data::${anvil_name}::storage_group::${storage_group_name}::say_used_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_used_size},
"s2:anvil_data::${anvil_name}::storage_group::${storage_group_name}::say_free_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_free_size},
"s1:anvil_data::${anvil_name}::storage_group::${storage_group_name}::storage_group_uuid" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{storage_group_uuid},
"s2:anvil_data::${anvil_name}::storage_group::${storage_group_name}::used_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{used_size},
"s3:anvil_data::${anvil_name}::storage_group::${storage_group_name}::free_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{free_size},
"s4:anvil_data::${anvil_name}::storage_group::${storage_group_name}::say_used_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_used_size},
"s5:anvil_data::${anvil_name}::storage_group::${storage_group_name}::say_free_size" => $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_free_size},
}});
if (length($storage_group_name) > $anvil->data->{longest}{storage_group})
@ -237,6 +256,77 @@ sub show_anvils
{
my ($anvil) = @_;
if ($anvil->data->{switches}{machine})
{
show_anvils_machine($anvil);
}
else
{
show_anvils_human($anvil);
}
return(0);
}
sub show_anvils_machine
{
my ($anvil) = @_;
$anvil->data->{show}{xml} .= " <nodes>\n";
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvil_data}})
{
my $anvil_uuid = $anvil->data->{anvil_data}{$anvil_name}{anvil_uuid};
my $anvil_description = $anvil->data->{anvil_data}{$anvil_name}{description};
$anvil_description =~ s/^"//;
$anvil_description =~ s/"$//;
$anvil_description =~ s/"/\"/g;
my $cpu_cores = $anvil->data->{anvil_data}{$anvil_name}{cpu_cores};
my $cpu_threads = $anvil->data->{anvil_data}{$anvil_name}{cpu_threads};
my $ram_available = $anvil->data->{anvil_data}{$anvil_name}{ram_available};
my $ram_available_hr = $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_available});
my $ram_used = $anvil->data->{anvil_data}{$anvil_name}{ram_used};
my $ram_used_hr = $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used});
my $ram_hardware = $anvil->data->{anvil_data}{$anvil_name}{ram_hardware};
my $ram_hardware_hr = $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_hardware});
my $bridge_string = $anvil->data->{anvil_data}{$anvil_name}{bridge_string};
$anvil->data->{show}{xml} .= " <node name=\"".$anvil_name."\" uuid=\"".$anvil_uuid."\" description=\"".$anvil_description."\">
<cpu cores=\"".$cpu_cores."\" threads=\"".$cpu_threads."\" />
<ram hardware=\"".$ram_hardware."\" hardware-hr=\"".$ram_available_hr."\" used=\"".$ram_used."\" used-hr=\"".$ram_used_hr."\" available=\"".$ram_available."\" available-hr=\"".$ram_available_hr."\" />
<bridges available=\"".$bridge_string."\" />
";
foreach my $storage_group_name (sort {$a cmp $b} keys %{$anvil->data->{anvil_data}{$anvil_name}{storage_group}})
{
my $storage_group_uuid = $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{storage_group_uuid};
my $used_bytes = $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{used_size};
my $free_bytes = $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{free_size};
my $say_used_size = $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_used_size};
my $say_free_size = $anvil->data->{anvil_data}{$anvil_name}{storage_group}{$storage_group_name}{say_free_size};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:storage_group_name' => $storage_group_name,
's2:storage_group_uuid' => $storage_group_uuid,
's3:used_bytes' => $used_bytes,
's4:free_bytes' => $free_bytes,
's5:say_used_size' => $say_used_size,
's6:say_free_size' => $say_free_size,
}});
$anvil->data->{show}{xml} .= " <storage-group name=\"".$storage_group_name."\" uuid=\"".$storage_group_uuid."\" used-bytes=\"".$used_bytes."\" used-hr=\"".$say_used_size."\" free-bytes=\"".$free_bytes."\" free-hr=\"".$say_free_size."\" />\n";
}
$anvil->data->{show}{xml} .= " </node>\n";
}
$anvil->data->{show}{xml} .= " </nodes>
</resources>
";
print $anvil->data->{show}{xml};
return(0);
}
sub show_anvils_human
{
my ($anvil) = @_;
my $anvil_header = $anvil->Words->string({key => "header_0081"});
my $longest_anvil_name = length($anvil_header) > $anvil->data->{longest}{anvil_name} ? length($anvil_header) : $anvil->data->{longest}{anvil_name};
my $description_header = $anvil->Words->string({key => "header_0074"});
@ -381,7 +471,6 @@ sub show_anvils
push @{$anvil->data->{display}{lines}}, $blank_lead.$storage_groups->[$i];
}
}
}
push @{$anvil->data->{display}{lines}}, $break_line;
@ -392,6 +481,96 @@ sub show_servers
{
my ($anvil) = @_;
if ($anvil->data->{switches}{machine})
{
show_servers_machine($anvil);
}
else
{
show_servers_human($anvil);
}
return(0);
}
sub show_servers_machine
{
my ($anvil) = @_;
$anvil->data->{show}{xml} = "<?xml version=\"1.0\" ?>
<resources>
<servers>\n";
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{server_data}})
{
foreach my $server_uuid (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}})
{
my $anvil_name = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{anvil_name};
my $anvil_uuid = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{anvil_uuid};
my $cpu_sockets = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{sockets};
my $cpu_cores = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{cores};
my $cpu_threads = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{threads};
my $cpu_model = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{model_name} ? $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{model_name} : "";
my $cpu_fallback = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{model_fallback} ? $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{model_fallback} : "";
my $cpu_match = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{match} ? $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{match} : "";
my $cpu_vendor = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{vendor} ? $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{vendor} : "";
my $cpu_mode = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{mode} ? $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{cpu}{mode} : "";
my $ram_used = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{ram_used};
my $ram_used_hr = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{say_ram_used};
$anvil->data->{show}{xml} .= " <server name=\"".$server_name."\" uuid=\"".$server_uuid."\">
<anvil name=\"".$anvil_name."\" uuid=\"".$anvil_uuid."\" />
<cpu cores=\"".$cpu_cores."\" threads=\"".$cpu_threads."\" sockets=\"".$cpu_sockets."\" model=\"".$cpu_model."\" fallback=\"".$cpu_fallback."\" match=\"".$cpu_match."\" vendor=\"".$cpu_vendor."\" mode=\"".$cpu_mode."\" />
<ram used_bytes=\"".$ram_used."\" used_hr=\"".$ram_used_hr."\" />
";
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}})
{
$anvil->data->{show}{xml} .= " <storage resource=\"".$resource."\">\n";
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}})
{
my $size = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{size};
my $say_size = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{say_size};
my $storage_group = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{storage_group};
$anvil->data->{show}{xml} .= " <volume number=\"".$volume."\" size-bytes=\"".$size."\" size-hr=\"".$say_size."\" storage-group=\"".$storage_group."\">\n";
foreach my $drbd_node (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}})
{
my $drbd_path = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{drbd_path};
my $drbd_path_by_res = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{drbd_path_by_res};
my $drbd_minor = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{drbd_minor};
my $meta_disk = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{'meta-disk'};
my $backing_lv = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{backing_lv};
my $node_host_uuid = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{disk}{$resource}{$volume}{node}{$drbd_node}{host_uuid};
$anvil->data->{show}{xml} .= " <subnode name=\"".$drbd_node."\" host-uuid=\"".$node_host_uuid."\" path=\"".$drbd_path."\" res-path=\"".$drbd_path_by_res."\" minor=\"".$drbd_minor."\" meta-data=\"".$meta_disk."\" lv=\"".$backing_lv."\" />\n";
}
$anvil->data->{show}{xml} .= " </volume>\n";
}
$anvil->data->{show}{xml} .= " </storage>\n";
}
foreach my $bridge (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{net}})
{
$anvil->data->{show}{xml} .= " <network bridge=\"".$bridge."\">\n";
foreach my $alias (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{net}{$bridge}})
{
foreach my $mac (sort {$a cmp $b} keys %{$anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{net}{$bridge}{$alias}})
{
my $model = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{net}{$bridge}{$alias}{$mac}{model};
my $ip = $anvil->data->{server_data}{$server_name}{server_uuid}{$server_uuid}{net}{$bridge}{$alias}{$mac}{ip};
$anvil->data->{show}{xml} .= " <mac alias=\"".$alias."\" address=\"".$mac."\" model=\"".$model."\" ip=\"".$ip."\" />\n";
}
}
$anvil->data->{show}{xml} .= " </network>\n";
}
$anvil->data->{show}{xml} .= " </server>\n";
}
}
$anvil->data->{show}{xml} .= " </servers>\n";
return(0);
}
sub show_servers_human
{
my ($anvil) = @_;
my $server_header = $anvil->Words->string({key => "header_0065"});
my $longest_server_name = length($server_header) > $anvil->data->{longest}{server_name} ? length($server_header) : $anvil->data->{longest}{server_name};
my $anvil_header = $anvil->Words->string({key => "brand_0002"});

@ -39,6 +39,16 @@ $| = 1;
my $anvil = Anvil::Tools->new();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches
$anvil->Get->switches({list => [], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
### TODO: Remove this before final release
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
##########################################
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
@ -48,27 +58,14 @@ if (($< != 0) && ($> != 0))
$anvil->nice_exit({exit_code => 1});
}
# If dnf is running, hold.
$anvil->System->wait_on_dnf();
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{disable} = "";
$anvil->data->{switches}{enable} = "";
$anvil->data->{switches}{force} = "";
$anvil->data->{switches}{'local'} = "";
$anvil->data->{switches}{status} = "";
$anvil->Get->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::disable' => $anvil->data->{switches}{disable},
'switches::enable' => $anvil->data->{switches}{enable},
'switches::force' => $anvil->data->{switches}{force},
'switches::local' => $anvil->data->{switches}{'local'},
'switches::status' => $anvil->data->{switches}{status},
}});
# If I have no databases, sleep until I do
if (not $anvil->data->{sys}{database}{connections})
{
@ -134,11 +131,11 @@ sub check_drbd
my $peer_password = $anvil->data->{sys}{peer_password};
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
short_host_name => $short_host_name,
peer_host_uuid => $peer_host_uuid,
peer_short_host_name => $peer_short_host_name,
peer_password => $anvil->Log->is_secure($peer_password),
peer_ip_address => $peer_ip_address,
's1:short_host_name' => $short_host_name,
's2:peer_host_uuid' => $peer_host_uuid,
's3:peer_short_host_name' => $peer_short_host_name,
's4:peer_password' => $anvil->Log->is_secure($peer_password),
's5:peer_ip_address' => $peer_ip_address,
}});
# Get the list of resources up on the peer.
@ -151,21 +148,20 @@ sub check_drbd
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}})
{
my $peer_is_me = $anvil->Network->is_local({host => $peer_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_host_uuid => $peer_host_uuid,
peer_short_host_name => $peer_short_host_name,
peer_password => $anvil->Log->is_secure($peer_password),
peer_ip_address => $peer_ip_address,
's1:peer_name' => $peer_name,
's2:peer_is_me' => $peer_is_me,
}});
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'};
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_connection_state => $peer_connection_state,
local_connection_state => $local_connection_state,
's1:peer_connection_state' => $peer_connection_state,
's2:local_connection_state' => $local_connection_state,
}});
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i))
@ -596,6 +592,13 @@ sub prerun_checks
host_type => $host_type,
}});
if (($host_type eq "node") or ($host_type eq "dr"))
{
# Call DRBD->get_status because, if we're just starting up and the kernel module needs to be
# built, do it before we start calling scan agents.
$anvil->DRBD->get_status({debug => 2});
}
if ($host_type ne "node")
{
# We're done.
@ -629,6 +632,8 @@ sub prerun_checks
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}),
}});
# We don't use this anymore, it's managed by the 'anvil-safe-start.service' daemon.
=cut
# Are we being asked to enable or disable?
my $nodes = [$host_uuid];
my $set_to = 1;
@ -742,6 +747,7 @@ sub prerun_checks
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
$anvil->nice_exit({exit_code => 0});
}
=cut
# Is another instance running?
my $pids = $anvil->System->pids({
@ -754,10 +760,12 @@ sub prerun_checks
if ($other_instances)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233", variables => { program => $THIS_FILE }});
sleep 2;
$anvil->nice_exit({exit_code => 0});
}
=cut
# Last test, enabled or forced?
if (not $local_enabled)
{
@ -775,6 +783,7 @@ sub prerun_checks
$anvil->nice_exit({exit_code => 0});
}
}
=cut
return(0);
}

@ -29,19 +29,16 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'poweroff'} = "";
$anvil->data->{switches}{'power-off'} = ""; # By default, the node is withdrawn. With this switch, the node will power off as well.
$anvil->data->{switches}{'stop-reason'} = ""; # Optionally used to set 'system::stop_reason' reason for this host. Valid values are 'user', 'power' and 'thermal'.
$anvil->data->{switches}{'stop-servers'} = ""; # Default behaviour is to migrate servers to the peer, if the peer is up. This overrides that and forces hosted servers to shut down.
$anvil->Get->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::poweroff' => $anvil->data->{switches}{'poweroff'},
'switches::power-off' => $anvil->data->{switches}{'power-off'},
'switches::stop-reason' => $anvil->data->{switches}{'stop-reason'},
'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'},
}});
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"no-db",
"poweroff",
"power-off",
"stop-reason",
"stop-servers"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Let 'poweroff' work as a mis-spell of 'power-off'
if (($anvil->data->{switches}{'poweroff'}) && (not $anvil->data->{switches}{'power-off'}))
@ -63,15 +60,27 @@ if (($< != 0) && ($> != 0))
$anvil->nice_exit({exit_code => 1});
}
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
if ($anvil->data->{switches}{'no-db'})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
$anvil->data->{sys}{database}{connections} = 0;
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::database::connections' => $anvil->data->{sys}{database}{connections},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
}
else
{
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
}
# If we still don't have a job-uuit, go into interactive mode.
@ -115,12 +124,13 @@ if ($anvil->data->{switches}{'job-uuid'})
}
}
# Make sure we're in an Anvil!
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid();
if (not $anvil->data->{sys}{anvil_uuid})
# Make sure we're a subnode or DR host
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "node") && ($host_type ne "dr"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
}
@ -154,7 +164,7 @@ if ($anvil->data->{switches}{'power-off'})
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0692!#" }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0325"});
$anvil->Job->update_progress({progress => 100, message => "job_0325"});
$anvil->Job->update_progress({progress => 100, message => "job_0325"}) if $anvil->data->{switches}{'job-uuid'};
# Set the stop reason.
if ($anvil->data->{switches}{'stop-reason'})
@ -216,7 +226,7 @@ sub stop_cluster
{
# Cluster has stopped.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 5, message => "job_0313"});
$anvil->Job->update_progress({progress => 5, message => "job_0313"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
@ -225,7 +235,7 @@ sub stop_cluster
{
# Stop pacemaker now.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0323"});
$anvil->Job->update_progress({progress => 70, message => "job_0323"});
$anvil->Job->update_progress({progress => 70, message => "job_0323"}) if $anvil->data->{switches}{'job-uuid'};
### NOTE: '--force' is needed or else sole-running nodes can't exit
### (complains about the loss of quorum)
@ -243,7 +253,7 @@ sub stop_cluster
else
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0324"});
$anvil->Job->update_progress({progress => 80, message => "job_0324"});
$anvil->Job->update_progress({progress => 80, message => "job_0324"}) if $anvil->data->{switches}{'job-uuid'};
}
}
if ($waiting)
@ -279,31 +289,32 @@ sub process_servers
my $can_migrate = 1;
if ($server_count)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:problem' => $problem,
's2:cib::parsed::local::ready' => $anvil->data->{cib}{parsed}{'local'}{ready},
's3:cib::parsed::peer::ready' => $anvil->data->{cib}{parsed}{peer}{ready},
}});
if ($problem)
{
# We're not in the node's cluster, we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
elsif ((not $anvil->data->{cib}{parsed}{'local'}{ready}) or (not $anvil->data->{cib}{parsed}{peer}{ready}))
if ($anvil->Get->host_type() eq "dr")
{
# One of the subnodes is not in the cluster, so we can't migrate.
# No pacemaker, only stop servers.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "job_0470"});
}
if ((not $anvil->data->{switches}{'stop-servers'}) && (not $can_migrate))
else
{
# We would have to stop the servers, and the user didn't tell us to do that, abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0372"});
$anvil->Job->update_progress({progress => 100, message => "error_0372"});
$anvil->nice_exit({exit_code => 1});
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:problem' => $problem,
's2:cib::parsed::local::ready' => $anvil->data->{cib}{parsed}{'local'}{ready},
's3:cib::parsed::peer::ready' => $anvil->data->{cib}{parsed}{peer}{ready},
}});
if ($problem)
{
# We're not in the node's cluster, we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
elsif ((not $anvil->data->{cib}{parsed}{'local'}{ready}) or (not $anvil->data->{cib}{parsed}{peer}{ready}))
{
# One of the subnodes is not in the cluster, so we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
}
}
@ -311,13 +322,20 @@ sub process_servers
{
# Tell the user we're about to shut down servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0320"});
$anvil->Job->update_progress({progress => 10, message => "job_0320"});
$anvil->Job->update_progress({progress => 10, message => "job_0320"}) if $anvil->data->{switches}{'job-uuid'};
}
else
elsif ($can_migrate)
{
# Tell the user we're about to migrate servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0321"});
$anvil->Job->update_progress({progress => 10, message => "job_0321"});
$anvil->Job->update_progress({progress => 10, message => "job_0321"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
# We would have to stop the servers, and the user didn't tell us to do that, abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0372"});
$anvil->Job->update_progress({progress => 100, message => "error_0372"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
}
while ($waiting)
@ -351,7 +369,7 @@ sub process_servers
{
# It's running despite the cluster being own, stop it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0419", variables => { server => $server_name }});
$anvil->Job->update_progress({progress => $progress, message => "job_0419,!!server!".$server_name."!!"});
$anvil->Job->update_progress({progress => $progress, message => "job_0419,!!server!".$server_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
@ -365,7 +383,7 @@ sub process_servers
{
# Hit the power button again.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0420", variables => { server => $server_name }});
$anvil->Job->update_progress({progress => $progress, message => "job_0420,!!server!".$server_name."!!"});
$anvil->Job->update_progress({progress => $progress, message => "job_0420,!!server!".$server_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
@ -407,7 +425,7 @@ sub process_servers
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
$anvil->Job->update_progress({progress => 80, message => "job_0313"});
$anvil->Job->update_progress({progress => 80, message => "job_0313"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
@ -436,7 +454,7 @@ sub process_servers
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0315", variables => { server => $server }});
$anvil->Job->update_progress({progress => 20, message => "job_0315,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => 20, message => "job_0315,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
}
elsif ($host_name eq $local_name)
{
@ -454,7 +472,7 @@ sub process_servers
{
# Use PCS.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0316", variables => { server => $server }});
$anvil->Job->update_progress({progress => 20, message => "job_0316,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => 20, message => "job_0316,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
@ -473,7 +491,7 @@ sub process_servers
{
# Use virsh
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0317", variables => { server => $server }});
$anvil->Job->update_progress({progress => 20, message => "job_0317,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => 20, message => "job_0317,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server,
@ -495,7 +513,7 @@ sub process_servers
server => $server,
node => $peer_name,
}});
$anvil->Job->update_progress({progress => 20, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"});
$anvil->Job->update_progress({progress => 20, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->Cluster->migrate_server({
server => $server,
node => $peer_name,
@ -512,7 +530,7 @@ sub process_servers
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0319"});
$anvil->Job->update_progress({progress => 30, message => "job_0319"});
$anvil->Job->update_progress({progress => 30, message => "job_0319"}) if $anvil->data->{switches}{'job-uuid'};
return(0);
}
@ -523,7 +541,7 @@ sub wait_on_drbd
my ($anvil) = @_;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0322"});
$anvil->Job->update_progress({progress => 40, message => "job_0322"});
$anvil->Job->update_progress({progress => 40, message => "job_0322"}) if $anvil->data->{switches}{'job-uuid'};
my $short_host_name = $anvil->Get->short_host_name();
my $waiting = 1;
while ($waiting)
@ -557,7 +575,7 @@ sub wait_on_drbd
resource => $server_name,
volume => $volume,
}});
$anvil->Job->update_progress({progress => 50, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"});
$anvil->Job->update_progress({progress => 50, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"}) if $anvil->data->{switches}{'job-uuid'};
}
}
}
@ -570,7 +588,7 @@ sub wait_on_drbd
# All servers should be down now, so stop DRBD.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0314"});
$anvil->Job->update_progress({progress => 60, message => "job_0314"});
$anvil->Job->update_progress({progress => 60, message => "job_0314"}) if $anvil->data->{switches}{'job-uuid'};
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." down all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});

@ -27,30 +27,41 @@ $| = 1;
my $anvil = Anvil::Tools->new();
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'no-wait'} = ""; # When set, we'll not wait when we shut down a single server
$anvil->data->{switches}{'server'} = "";
$anvil->data->{switches}{'server-uuid'} = "";
$anvil->data->{switches}{'wait'} = ""; # When set, we'll wait for each server to shut down when using '--all'
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::no-wait' => $anvil->data->{switches}{'no-wait'},
'switches::server' => $anvil->data->{switches}{'server'},
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'},
'switches::wait' => $anvil->data->{switches}{'wait'},
}});
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"no-db",
"no-wait",
"server",
"server-uuid",
"wait"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
# Connect to DBs.
if ($anvil->data->{switches}{'no-db'})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "log_0743"});
# If there was a job-uuid, clear it.
$anvil->data->{sys}{database}{connections} = 0;
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::database::connections' => $anvil->data->{sys}{database}{connections},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
}
else
{
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
}
if ($anvil->data->{switches}{'job-uuid'})
@ -88,6 +99,14 @@ if ($anvil->data->{switches}{'job-uuid'})
# Now check that we have a server. If it's a server_uuid, read the server name.
if ($anvil->data->{switches}{'server-uuid'})
{
# DO we have DB connection(s)?
if (not $anvil->data->{sys}{database}{connections})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"});
$anvil->Job->update_progress({progress => 100, message => "error_0265"});
$anvil->nice_exit({exit_code => 1});
}
# Convert the server_uuid to a server_name.
my $query = "SELECT server_name FROM servers WHERE server_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'server-uuid'}).";";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
@ -125,6 +144,9 @@ if (not $anvil->data->{switches}{'server'})
# Are we a node or DR host?
$anvil->data->{sys}{host_type} = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::host_type' => $anvil->data->{sys}{host_type},
}});
if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type} ne "dr"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0264"});
@ -132,26 +154,28 @@ if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type
$anvil->nice_exit({exit_code => 1});
}
### TODO: Add DR support. For now, this only works on Nodes in a cluster
if ($anvil->data->{sys}{host_type} eq "dr")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"});
$anvil->Job->update_progress({progress => 100, message => "error_0265"});
$anvil->nice_exit({exit_code => 1});
}
# Make sure that we're in an Anvil! system.
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid();
if (not $anvil->data->{sys}{anvil_uuid})
$anvil->data->{sys}{anvil_uuid} = "";
if (($anvil->data->{sys}{host_type} eq "node") && ($anvil->data->{sys}{database}{connections}))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"});
$anvil->nice_exit({exit_code => 1});
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::anvil_uuid' => $anvil->data->{sys}{anvil_uuid},
}});
if (not $anvil->data->{sys}{anvil_uuid})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"});
$anvil->nice_exit({exit_code => 1});
}
}
# This is copied from anvil-boot-server, but it works here as well. We can't use 'pcs' without pacemaker
# being up.
wait_for_pacemaker($anvil);
if ($anvil->data->{sys}{host_type} eq "node")
{
wait_for_pacemaker($anvil);
}
# If 'server' is 'all', shut down all servers.
if (lc($anvil->data->{switches}{'server'}) eq "all")
@ -165,7 +189,7 @@ else
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"});
$anvil->Job->update_progress({progress => 100, message => "job_0281"});
$anvil->Job->update_progress({progress => 100, message => "job_0281"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 0});
@ -195,20 +219,20 @@ sub wait_for_pacemaker
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0279"});
$anvil->Job->update_progress({progress => 15, message => "job_0279"});
$anvil->Job->update_progress({progress => 15, message => "job_0279"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
# Node isn't ready yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0278"});
$anvil->Job->update_progress({progress => 10, message => "job_0278"});
$anvil->Job->update_progress({progress => 10, message => "job_0278"}) if $anvil->data->{switches}{'job-uuid'};
}
}
else
{
# Cluster hasn't started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"});
$anvil->Job->update_progress({progress => 5, message => "job_0277"});
$anvil->Job->update_progress({progress => 5, message => "job_0277"}) if $anvil->data->{switches}{'job-uuid'};
}
if ($waiting)
{
@ -233,7 +257,7 @@ sub shutdown_server
{
# Nope.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0548", variables => { server => $server }});
$anvil->Job->update_progress({progress => 100, message => "log_0548,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => 100, message => "log_0548,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
}
@ -243,24 +267,42 @@ sub shutdown_server
{
# It's off already
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0284", variables => { server => $server }});
$anvil->Job->update_progress({progress => $progress, message => "job_0284,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => $progress, message => "job_0284,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
return(0);
}
# Now shut down.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0289", variables => { server => $server }});
$anvil->Job->update_progress({progress => $progress, message => "job_0289,!!server!".$server."!!"});
my $problem = $anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
'wait' => $wait,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
$anvil->Job->update_progress({progress => $progress, message => "job_0289,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
my $problem = 0;
if ($anvil->Get->host_type eq "dr")
{
# Shut down using virsh. Invert the return.
my $success = $anvil->Server->shutdown_virsh({
debug => 2,
wait_time => $wait ? 0 : 1,
});
$problem = $success ? 0 : 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
success => $success,
problem => $problem,
}});
}
else
{
$problem = $anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
'wait' => $wait,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
}
if ($problem)
{
# Failed, abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0268", variables => { server => $server }});
$anvil->Job->update_progress({progress => 100, message => "error_0268,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => 100, message => "error_0268,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
}
else
@ -269,13 +311,13 @@ sub shutdown_server
{
# Stopped!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0285", variables => { server => $server }});
$anvil->Job->update_progress({progress => $progress, message => "job_0285,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => $progress, message => "job_0285,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
# Stop requested.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0286", variables => { server => $server }});
$anvil->Job->update_progress({progress => $progress, message => "job_0286,!!server!".$server."!!"});
$anvil->Job->update_progress({progress => $progress, message => "job_0286,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
}
}
@ -289,7 +331,7 @@ sub shutdown_all_servers
### TODO: Manage the stop order here, inverse of boot order.
# We top out at 90, bottom is 20.
my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}};
my $increment = int(70 / $server_count);
my $increment = $server_count ? int(70 / $server_count) : 70;
my $percent = 15;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_count => $server_count,

@ -29,9 +29,8 @@ my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"task",
"resource",
], man => $THIS_FILE});
"task"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
@ -57,6 +56,7 @@ if ($anvil->data->{switches}{'job-uuid'})
message => "message_0311",
});
}
if ($anvil->data->{switches}{task} eq "refresh-drbd-resource")
{
refresh_drbd_resource($anvil);
@ -117,4 +117,4 @@ sub refresh_drbd_resource
$anvil->nice_exit({exit_code => 0});
return(0);
}
}

@ -37,29 +37,64 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
my $anvil = Anvil::Tools->new();
### TODO: Remove this before final release
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
##########################################
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"clear-cache",
"no-db",
"no-reboot",
"reboot"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Log that we've started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Connect to DBs.
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
if ($anvil->data->{switches}{'no-db'})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "log_0743"});
# If there was a job-uuid, clear it.
$anvil->data->{sys}{database}{connections} = 0;
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::database::connections' => $anvil->data->{sys}{database}{connections},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
}
else
{
# No databases, exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"});
$anvil->nice_exit({exit_code => 1});
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"});
$anvil->nice_exit({exit_code => 1});
}
}
$anvil->data->{jobs}{job_uuid} = "";
if ($anvil->data->{switches}{'job-uuid'})
{
# See if another instance is running. If so, sleep for 10 seconds and then exit. The other instance
# could be the '--no-db' run we're about to clobber.
my $pids = $anvil->System->pids({
ignore_me => 1,
program_name => $THIS_FILE,
});
my $other_instances = @{$pids};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }});
if ($other_instances)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233", variables => { program => $THIS_FILE }});
sleep 10;
$anvil->nice_exit({exit_code => 0});
}
# Load the job details. If anything is returned, there was a problem.
my $return = $anvil->Job->get_job_details({job_uuid => $anvil->data->{switches}{'job-uuid'}});
if ($return)
@ -70,7 +105,7 @@ if ($anvil->data->{switches}{'job-uuid'})
}
}
# Clea any old runs.
# Clear any old runs.
update_progress($anvil, 0, "clear");
# We'll keep a count of lines and packages to show the user.
@ -84,10 +119,56 @@ update_progress($anvil, 1, "message_0058,!!downloaded!0!!,!!installed!0!!,!!veri
update_progress($anvil, 2, "message_0033");
# Make sure maintenance mode is enabled.
$anvil->System->maintenance_mode({set => 1});
$anvil->System->maintenance_mode({set => 1}) if $anvil->data->{sys}{database}{connections};
# Run the update
run_os_update($anvil, 1, 3);
run_os_update($anvil, 3);
# If we had no database, try to reconnect now tha
if (not $anvil->data->{sys}{database}{connections})
{
# Start the anvil-daemon, the caller likely called without a DB because we're being updated by
# striker-update-cluster, and so there will be a job waiting for us.
$anvil->System->enable_daemon({now => 1, daemon => "anvil-daemon"});
$anvil->System->enable_daemon({now => 1, daemon => "scancore"});
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
if ($anvil->data->{sys}{database}{connections})
{
# If there's a job for us waiting, mark it as almost done.
my $query = "
SELECT
job_uuid
FROM
jobs
WHERE
job_command LIKE '%".$THIS_FILE."%'
AND
job_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
AND
job_progress = 0
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
my $job_uuid = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
if ($job_uuid)
{
$anvil->data->{jobs}{job_uuid} = $job_uuid;
$anvil->data->{switches}{'job-uuid'} = $job_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'jobs::job_uuid' => $anvil->data->{jobs}{job_uuid},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
update_progress($anvil, 0, "clear");
update_progress($anvil, 90, "message_0324");
}
}
}
# We're done updating
my $reboot_needed = $anvil->System->reboot_needed({debug => 2});
@ -97,7 +178,7 @@ if ($reboot_needed)
if (not $anvil->data->{switches}{'no-reboot'})
{
# Clear maintenance mode.
$anvil->System->maintenance_mode({set => 0});
$anvil->System->maintenance_mode({set => 0}) if $anvil->data->{sys}{database}{connections};
# Record that we're rebooting so that 'striker-update-cluster' knows to wait for a reboot.
if ($anvil->data->{switches}{'job-uuid'})
@ -117,20 +198,31 @@ WHERE
# Register a job to reboot.
update_progress($anvil, 98, "message_0318");
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'anvil-manage-power'}." --reboot -y".$anvil->Log->switches,
job_data => "",
job_name => "reboot::system",
job_title => "job_0009",
job_description => "job_0006",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Record that we're going to reboot now.
update_progress($anvil, 100, "message_0317");
if ($anvil->data->{sys}{database}{connections})
{
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'anvil-manage-power'}." --reboot -y".$anvil->Log->switches,
job_data => "",
job_name => "reboot::system",
job_title => "job_0009",
job_description => "job_0006",
job_progress => 0,
job_status => "anvil_startup",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# If we got a job UUID, unlink the cache file.
if (-e $anvil->data->{path}{data}{reboot_cache})
{
unlink $anvil->data->{path}{data}{reboot_cache};
}
}
}
else
{
@ -144,7 +236,7 @@ else
}
# Clear maintenance mode.
$anvil->System->maintenance_mode({set => 0});
$anvil->System->maintenance_mode({set => 0}) if $anvil->data->{sys}{database}{connections};
$anvil->nice_exit({exit_code => 0});
@ -159,16 +251,22 @@ sub update_progress
my ($anvil, $progress, $message) = @_;
# Log the progress percentage.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
progress => $progress,
message => $message,
"jobs::job_uuid" => $anvil->data->{jobs}{job_uuid},
}});
if ($progress > 100)
{
$progress = 100;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { progress => $progress }});
}
if ($anvil->data->{jobs}{job_uuid})
{
$anvil->Job->update_progress({
debug => 3,
debug => 2,
'print' => 1,
progress => $progress,
message => $message,
@ -182,25 +280,22 @@ sub update_progress
# This updates the OS.
sub run_os_update
{
my ($anvil, $try, $progress) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
try => $try,
progress => $progress,
}});
my ($anvil, $progress) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { progress => $progress }});
# This needs to be set to avoid warnings when called without a job-uuid.
$anvil->data->{sys}{last_update} = 0;
# Make sure that, if we're a node, we're out of the cluster.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type eq "node")
{
# Call anvil-safe-stop
update_progress($anvil, $progress++, "message_0314");
my $problem = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
my $problem = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
# Call anvil-safe-stop
@ -256,8 +351,72 @@ WHERE
update_progress($anvil, 5, "message_0316");
}
# Before we start, do we need to remove our locally build DRBD kernel modules?
my $package_changes = 0;
my $remove_drbd_kmod = 0;
my $shell_call = $anvil->data->{path}{exe}{dnf}." check-update";
open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
while(<$file_handle>)
{
chomp;
my $line = $anvil->Words->clean_spaces({string => $_});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if (($line =~ /kmod-drbd/) or ($line =~ /kernel/))
{
# Looks like it.
$remove_drbd_kmod = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remove_drbd_kmod => $remove_drbd_kmod }});
last;
}
}
close $file_handle;
# So, shall we?
if ($remove_drbd_kmod)
{
# Yes, remove
update_progress($anvil, $progress++, "message_0320");
my $versions_to_remove = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." list installed";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(kmod-drbd-\d+.*?)\s/)
{
$versions_to_remove .= $1." ";
$package_changes++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
versions_to_remove => $versions_to_remove,
package_changes => $package_changes,
}});
}
}
# Now remove those packages.
update_progress($anvil, $progress++, "message_0321");
$shell_call = $anvil->data->{path}{exe}{dnf}." -y remove ".$versions_to_remove;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
open ($file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
while(<$file_handle>)
{
chomp;
my $line = $anvil->Words->clean_spaces({string => $_});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
}
close $file_handle;
}
# NOTE: We run this directly to better monitor progress and update the progress.
my $package_changes = 0;
my $transaction_shown = 0;
my $success = 0;
my $to_update = 0;
@ -266,9 +425,9 @@ WHERE
my $next_step = 0;
my $verifying = 0;
my $output = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." -y update; ".$anvil->data->{path}{exe}{echo}." return_code:\$?";
$shell_call = $anvil->data->{path}{exe}{dnf}." -y update; ".$anvil->data->{path}{exe}{echo}." return_code:\$?";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }});
open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
open ($file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
while(<$file_handle>)
{
chomp;
@ -288,7 +447,7 @@ WHERE
{
# Update done, verifying now.
$verifying = 1;
update_progress($anvil, $progress, "message_0038");
update_progress($anvil, $progress++, "message_0038");
}
if ($line =~ /Running transaction/i)
@ -296,7 +455,7 @@ WHERE
# Done downloading
if (not $transaction_shown)
{
update_progress($anvil, $progress, "message_0037");
update_progress($anvil, $progress++, "message_0037");
$transaction_shown = 1;
}
}
@ -312,7 +471,7 @@ WHERE
}
}
if ($line =~ / (\d+) Packages$/i)
if (($line =~ / (\d+) Packages$/i) or ($line =~ / (\d+) Package$/i))
{
my $counted_lines = $1;
$package_changes = $counted_lines;
@ -330,7 +489,7 @@ WHERE
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update_size => $update_size }});
# Ready to install, update to 5%. The next step will count up to 95%.
update_progress($anvil, $progress, "message_0035,!!size!$update_size!!");
update_progress($anvil, $progress++, "message_0035,!!size!$update_size!!");
# The total (reliable) count of events is (to_update * 3), counting '(x/y): '
# (download), 'Upgrading '/'Installing ' and 'Verifying '. We ignore the scriplet
@ -392,79 +551,11 @@ WHERE
}
close $file_handle;
# If this is the first try and it failed, see if it's a DRBD issue.
if ((not $success) && ($try == 1))
{
# Is this the DRBD kmod issue?
my $remove_drbd_kmod = 0;
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /kmod-drbd/)
{
# Looks like it.
$remove_drbd_kmod = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remove_drbd_kmod => $remove_drbd_kmod }});
last;
}
}
# Clear the old kmod and try the update again.
if ($remove_drbd_kmod)
{
update_progress($anvil, $progress++, "message_0320");
my $versions_to_remove = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." list installed";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(kmod-drbd-\d+.*?)\s/)
{
$versions_to_remove .= $1." ";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { versions_to_remove => $versions_to_remove }});
}
}
# Now remove those packages.
update_progress($anvil, $progress++, "message_0321");
$shell_call = $anvil->data->{path}{exe}{dnf}." -y remove ".$versions_to_remove;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Now install the new packages.
update_progress($anvil, $progress++, "message_0322");
$shell_call = $anvil->data->{path}{exe}{dnf}." -y install kmod-drbd";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Now try again.
update_progress($anvil, $progress++, "message_0323");
run_os_update($anvil, 2, $progress);
return(0);
}
}
# Reload daemons to pick up any changed systemctl daemons.
my ($systemctl_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{systemctl}." daemon-reload", source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { systemctl_output => $systemctl_output, return_code => $return_code }});
$shell_call = $anvil->data->{path}{exe}{systemctl}." daemon-reload";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($systemctl_output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { systemctl_output => $systemctl_output, return_code => $return_code }});
### See if the kernel has been updated.
# Get the newest installed kernel
@ -479,7 +570,7 @@ WHERE
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { installed_kernel => $installed_kernel }});
# Get the running kernel
$shell_call = $anvil->data->{path}{exe}{uname}." -r";
$shell_call = $anvil->data->{path}{exe}{uname}." -r";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $active_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -492,18 +583,17 @@ WHERE
if ($installed_kernel ne $active_kernel)
{
# Reboot needed
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
$anvil->data->{sys}{reboot} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::reboot' => $anvil->data->{sys}{reboot} }});
}
# If we installed and packages, and '--reboot' was given, reboot anyway.
if (($package_changes) && ($anvil->data->{switches}{reboot}))
{
# Reboot needed
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
$anvil->data->{sys}{reboot} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::reboot' => $anvil->data->{sys}{reboot} }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
}
# Did it work?
@ -517,5 +607,15 @@ WHERE
$anvil->nice_exit({exit_code => 3});
}
# Run anvil-version-changes.
$shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
return(0);
};

@ -67,6 +67,9 @@ $anvil->data->{scancore} = {
$anvil->Storage->read_config();
# If dnf is running, hold.
$anvil->System->wait_on_dnf();
# Read switches
$anvil->Get->switches({list => ["purge", "run-once"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
@ -431,10 +434,12 @@ sub startup_tasks
# This used to call anvil-safe-start, which isn't done here anymore.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
if ($host_type eq "node")
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type eq "node") or ($host_type eq "dr"))
{
# For future use.
# Call DRBD->get_status because, if we're just starting up and the kernel module needs to be
# built, do it before we start calling scan agents.
$anvil->DRBD->get_status({debug => 2});
}
elsif ($host_type eq "striker")
{

@ -296,11 +296,17 @@ sub collect_remote_data
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
foreach my $preferred_network ("bcn", "mn", "ifn", "sn", "any")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { network_name => $network_name }});
if (($network_name !~ /^$preferred_network/) && ($preferred_network ne "any"))
{
next;
}
next if $network_name !~ /^$preferred_network/;
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({target => $target_ip});
@ -373,10 +379,14 @@ sub collect_remote_data
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
error => $error,
return_code => $return_code,
}});
# Copying the file
@ -403,6 +413,46 @@ sub collect_remote_data
});
}
# Grab cloud-init data, if it exists.
$shell_call = "if [ -e /var/log/cloud-init.log ]; then echo 1; else echo 0; fi";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
if ($output eq "1")
{
print "- Grabbing cloud-init logs... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/cloud-init*",
destination => $target_directory."/",
});
$test_file = $target_directory."/cloud-init.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, these files were not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
}
# If we're a striker, dump the database also.
if ($this_host_type eq "striker")
{
@ -636,6 +686,20 @@ sub collect_local_data
}});
print "Done!\n";
if (-e "/var/log/cloud-init.log")
{
print "- Grabbing cloud-init logs... ";
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/cloud-init* ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
}
# If this is a node, grab the shared files.
if ($this_host_type eq "node")
{

@ -34,11 +34,6 @@ $| = 1;
my $anvil = Anvil::Tools->new();
### TODO: Remove this before final release
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
##########################################
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"clear-cache",
@ -46,11 +41,17 @@ $anvil->Get->switches({list => [
"no-reboot",
"reboot",
"reboot-self",
"timeout",
"y",
"yes"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
### TODO: Remove this before final release
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
##########################################
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
@ -145,6 +146,8 @@ update_strikers_and_dr($anvil);
# Update DR Host
update_nodes($anvil);
manage_daemons($anvil, "start");
print "Updates complete!\n";
my $host_uuid = $anvil->Get->host_uuid;
@ -202,6 +205,7 @@ sub update_nodes
my $anvil_node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $anvil_node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $primary_host_uuid = $anvil->Cluster->get_primary_host_uuid({anvil_uuid => $anvil_uuid});
$primary_host_uuid = $anvil_node1_host_uuid if not $primary_host_uuid;
my $secondary_host_uuid = $primary_host_uuid eq $anvil_node1_host_uuid ? $anvil_node2_host_uuid : $anvil_node1_host_uuid;
my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$anvil_node1_host_uuid}{short_host_name};
my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$anvil_node2_host_uuid}{short_host_name};
@ -235,18 +239,21 @@ sub update_nodes
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
foreach my $preferred_network ("bcn", "mn", "ifn", "sn", "any")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
next if $network_name !~ /^$preferred_network/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { network_name => $network_name }});
if (($network_name !~ /^$preferred_network/) && ($preferred_network ne "any"))
{
next;
}
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({target => $target_ip});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:network_name' => $network_name,
's2:target_ip' => $target_ip,
's3:test_access' => $test_access,
's1:target_ip' => $target_ip,
's2:test_access' => $test_access,
}});
if ($test_access)
@ -258,7 +265,7 @@ sub update_nodes
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip},
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network},
}});
print "- Access found uver the: [".$network_name."] networking using the IP: [".$target_ip."]\n";
print "- Access found over the: [".$network_name."] networking using the IP: [".$target_ip."]\n";
last;
}
}
@ -310,26 +317,12 @@ sub update_nodes
print "- [ Note ] - If the node has servers that need to be migrated off, or if the node is SyncSource for storage,\n";
print "- [ Note ] - this could take some time to complete.\n";
# Register an anvil-safe-stop job and then wait.
my $job_uuid = $anvil->Database->insert_or_update_jobs({
debug => 2,
job_command => $anvil->data->{path}{exe}{'anvil-safe-stop'},
job_host_uuid => $host_uuid,
job_description => "job_0339",
job_name => "cgi-bin::set_membership::leave",
job_progress => 0,
job_title => "job_0338"
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
print "- [ Note ] - Job registered with UUID: [".$job_uuid."], waiting for it to complete.\n";
# Log into the target machine and make sure anvil-daemon is running.
print "- Making sure anvil-daemon is running... ";
my $shell_call = $anvil->data->{path}{exe}{systemctl}." enable --now anvil-daemon.service";
# Make sure VMs are off, DRBD is down and the node is out of the cluster. Call this
# with nohup so it doesn't get killed by the loss of the SSH connection.
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-stop'}." --no-db".$anvil->Log->switches()." >/dev/null 2>&1 &";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
'close' => 1,
no_cache => 1,
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
@ -338,84 +331,115 @@ sub update_nodes
error => $error,
return_code => $return_code,
}});
if (not $return_code)
{
print " running.\n";
}
else
{
print " not running!\n";
}
# Verify that the node is no longer in the cluster.
my $waiting = 1;
my $next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
# Now wait for DRBD resources to stop (which requires VMs be off).
print "- Waiting for all DRBD resource (and the servers using them) to stop before proceeding.\n";
my $wait_until = time + $anvil->data->{switches}{timeout};
my $next_log = time + 60;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
wait_until => $wait_until,
next_log => $next_log,
waiting => $waiting,
}});
while ($waiting)
{
my $drbd_up = 0;
my $pacemaker_up = 0;
$anvil->DRBD->get_status({
host => $short_host_name,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
# How may resources are up?
my $resource_count = keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_count => $resource_count }});
if ($resource_count)
{
# DRBD is still up.
$drbd_up = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_up => $drbd_up }});
}
# Is pacemaker down?
my $problem = $anvil->Cluster->parse_cib({target => $anvil->data->{peer}{$short_host_name}{access}{ip}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
if (not $problem)
{
# Node is still in the cluster.
$pacemaker_up = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pacemaker_up => $pacemaker_up }});
}
if ((not $pacemaker_up) && (not $drbd_up))
{
# This is good, it didn't parse so it's out of the cluster.
print "- The subnode is out of the node cluster. Proceeding.\n";
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
else
if ($waiting)
{
# Log which resources are still up
if (time > $next_log)
{
$anvil->Job->get_job_details({job_uuid => $job_uuid});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"jobs::job_progress" => $anvil->data->{jobs}{job_progress},
"jobs::job_data" => $anvil->data->{jobs}{job_data},
}});
if ($anvil->data->{jobs}{job_progress} == 0)
my $say_time = $anvil->Get->date_and_time({time_only => 1});
if ($pacemaker_up)
{
print "[ Warning ] - The job has not been picked up yet. Is 'anvil-daemon' running on: [".$short_host_name."]?\n";
print "[ Note ] - [".$say_time."] - The subnode is still in the cluster.\n";
}
else
{
print "[ Note ] - [".$anvil->Get->date_and_time({time_only => 1})."] - The job progress is: [".$anvil->data->{jobs}{job_progress}."], continuing to wait.\n";
print "[ Note ] - [".$say_time."] - The subnode is no longer in the cluster, good.\n";
}
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}})
{
print "[ Note ] - [".$say_time."] - The resource: [".$resource."] is still up.\n";
}
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
time_left => $time_left,
say_time_left => $say_time_left,
}});
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
sleep 5;
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the subnode: [".$short_host_name."] to stop all DRBD resources nad leave the cluster. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
sleep 10;
}
}
# Record the start time so that we can be sure the subnode has rebooted (uptime is
# less than the current time minus this start time), if the host reboots as part of
# the update.
my $reboot_time = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
reboot_time => $reboot_time,
short_host_name => $short_host_name,
}});
# Do the OS update.
print "- Beginning OS update of: [".$short_host_name."]\n";
my $rebooted = 0;
$shell_call = $anvil->data->{path}{exe}{'anvil-update-system'};
my $update_switches = "";
if ($anvil->data->{switches}{'no-reboot'})
{
$shell_call .= " --no-reboot";
}
if ($anvil->data->{switches}{'clear-cache'})
{
$shell_call .= " --clear-cache";
$update_switches .= " --no-reboot";
}
if ($anvil->data->{switches}{reboot})
{
$shell_call .= " --reboot";
$update_switches .= " --reboot";
}
$shell_call .= $anvil->Log->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update_switches => $update_switches }});
# We register a job, even though anvil-daemon isn't running. This will get picked up
# by 'anvil-update-systems --no-db' towards the end of it's run.
print "- Registering a job to update the subnode, which we can track to confirm when the update is done.\n";
$shell_call = $anvil->data->{path}{exe}{'anvil-update-system'}.$update_switches.$anvil->Log->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
$job_uuid = "";
$job_uuid = $anvil->Database->insert_or_update_jobs({
my $job_uuid = $anvil->Database->insert_or_update_jobs({
debug => 2,
job_command => $shell_call,
job_description => "job_0468",
@ -427,10 +451,47 @@ sub update_nodes
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
print "- [ Note ] - Job registered with UUID: [".$job_uuid."], waiting for it to complete.\n";
# Now call anvil-update-system with --no-db and background it so we can close
# the DB connection without killing the process.
print "- Calling the no-database update of: [".$short_host_name."]\n";
$shell_call = $anvil->data->{path}{exe}{nohup}." ".$anvil->data->{path}{exe}{'anvil-update-system'}." --no-db".$update_switches;
if ($anvil->data->{switches}{'clear-cache'})
{
# We'll only call clear-cache on this one.
$shell_call .= " --clear-cache";
}
$shell_call .= $anvil->Log->switches()." >/dev/null 2>&1 &";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
# Record the start time so that we can be sure the subnode has rebooted (uptime is
# less than the current time minus this start time), if the host reboots as part of
# the update.
my $rebooted = 0;
my $reboot_time = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
rebooted => $rebooted,
reboot_time => $reboot_time,
short_host_name => $short_host_name,
}});
# Verify that the node is no longer in the cluster.
$waiting = 1;
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
$wait_until = time + $anvil->data->{switches}{timeout};
$waiting = 1;
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
wait_until => $wait_until,
next_log => $next_log,
}});
while ($waiting)
{
$anvil->Job->get_job_details({job_uuid => $job_uuid});
@ -440,7 +501,7 @@ sub update_nodes
}});
if ($anvil->data->{jobs}{job_progress} == 100)
{
print "- Done! The host: [".$short_host_name."] has been updated\n";
print "- Done! The subnode: [".$short_host_name."] has been updated\n";
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
@ -461,18 +522,34 @@ sub update_nodes
}
else
{
my $say_date = $anvil->Get->date_and_time({time_only => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_date => $say_date }});
if (time > $next_log)
{
if ($anvil->data->{jobs}{job_progress} == 0)
{
print "[ Warning ] - The job has not been picked up yet. Is 'anvil-daemon' running on: [".$short_host_name."]?\n";
}
else
print "[ Note ] - [".$say_date."] - The job progress is: [".$anvil->data->{jobs}{job_progress}."], continuing to wait.\n";
if ($anvil->data->{jobs}{job_progress} eq "0")
{
print "[ Note ] - [".$anvil->Get->date_and_time({time_only => 1})."] - The job progress is: [".$anvil->data->{jobs}{job_progress}."], continuing to wait.\n";
print "[ Note ] - [".$say_date."] - It is expected for the job to stay at '0' for a while.\n";
}
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
time_left => $time_left,
say_time_left => $say_time_left,
}});
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the subnode: [".$short_host_name."] to update. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
sleep 5;
}
@ -488,7 +565,6 @@ sub update_nodes
else
{
print "- Reboot not needed, kernel appears to be up to date.\n";
$run_anvil_safe_start = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { run_anvil_safe_start => $run_anvil_safe_start }});
}
@ -496,15 +572,16 @@ sub update_nodes
# Wait for the node to rejoin the cluster. As before, this is a time
# unrestricted wait loop.
print "- Waiting for the subnode to rejoin the node.\n";
$wait_until = time + $anvil->data->{switches}{timeout};
$waiting = 1;
my $start_called = 0;
$next_log = time + 60;
my $manual_start = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
manual_start => $manual_start,
's1:wait_until' => $wait_until,
's2:next_log' => $next_log,
's3:manual_start' => $manual_start,
}});
while($waiting)
{
# Should we call a start to the cluster?
@ -512,7 +589,7 @@ sub update_nodes
{
print "- Calling 'anvil-safe-start' to rejoin the subnode to the node.\n";
$start_called = 1;
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-start'};
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-start'}.$anvil->Log->switches()." >/dev/null 2>&1 &";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
start_called => $start_called,
shell_call => $shell_call,
@ -659,8 +736,30 @@ sub update_nodes
if (time > $next_log)
{
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
my $say_time = $anvil->Get->date_and_time({time_only => 1});
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:say_time' => $say_time,
's2:next_log' => $next_log,
's3:time_left' => $time_left,
's4:say_time_left' => $say_time_left,
}});
# Tell the user we're still waiting.
print "- [".$say_time."] - We're still waiting for the subnode: [".$short_host_name."] to reboot.\n";
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the subnode: [".$short_host_name."] to join the subcluster. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
if ($waiting)
@ -707,13 +806,58 @@ sub update_strikers_and_dr
{
my ($anvil) = @_;
foreach my $host_type ("striker", "dr")
# Before we start, set the timeouts.
if ($anvil->data->{switches}{timeout})
{
if ($host_type eq "dr")
if ($anvil->data->{switches}{timeout} =~ /^(\d+)h/i)
{
# Restart daemons.
manage_daemons($anvil, "start");
my $hours = $1;
$anvil->data->{switches}{timeout} = $hours * 3600;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
hours => $hours,
"switches::timeout" => $anvil->data->{switches}{timeout},
}});
}
elsif ($anvil->data->{switches}{timeout} =~ /^(\d+)m/i)
{
my $minutes = $1;
$anvil->data->{switches}{timeout} = $minutes * 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
minutes => $minutes,
"switches::timeout" => $anvil->data->{switches}{timeout},
}});
}
else
{
# Set the default.
print "[ Warning ] - The passed timeout: [".$anvil->data->{switches}{timeout}."] is invalid, setting it to 24 hours.\n";
$anvil->data->{switches}{timeout} = 86400;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::timeout" => $anvil->data->{switches}{timeout},
}});
}
}
else
{
$anvil->data->{switches}{timeout} = 86400;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::timeout" => $anvil->data->{switches}{timeout},
}});
}
# Make sure the timeout, if set, is valid.
if ($anvil->data->{switches}{timeout})
{
if ($anvil->data->{switches}{timeout} =~ /\D/)
{
# Invalid, error out.
print "The --timeout switch was used: [".$anvil->data->{switches}{timeout}."], but the value isn't a number of seconds.\n";
$anvil->nice_exit({exit_code => 1});
}
}
foreach my $host_type ("striker", "dr")
{
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
@ -859,20 +1003,102 @@ sub update_strikers_and_dr
{
# Call anvil-update-system and then wait.
print "- Beginning OS update of: [".$short_host_name."]\n";
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-system'};
if ($anvil->data->{switches}{'no-reboot'})
if ($host_type eq "dr")
{
$shell_call .= " --no-reboot";
# Make sure VMs are off and DRBD is down. Call this with nohup so it
# doesn't get killed by the loss of the SSH connection.
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-stop'}." --no-db".$anvil->Log->switches()." >/dev/null 2>&1 &";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
# Now wait for DRBD resources to stop (which requires VMs be off).
print "- Waiting for all DRBD resource (and the servers using them) to stop before proceeding.\n";
my $wait_until = time + $anvil->data->{switches}{timeout};
my $next_log = time + 60;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
wait_until => $wait_until,
next_log => $next_log,
waiting => $waiting,
}});
while ($waiting)
{
my $drbd_up = 0;
$anvil->DRBD->get_status({
host => $short_host_name,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
# How may resources are up?
my $resource_count = keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_count => $resource_count }});
if (not $resource_count)
{
# Done!
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
if ($waiting)
{
# Log which resources are still up
if (time > $next_log)
{
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}})
{
print "[ Note ] - [".$anvil->Get->date_and_time({time_only => 1})."] - The resource: [".$resource."] is still up.\n";
}
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
time_left => $time_left,
say_time_left => $say_time_left,
}});
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the DR host: [".$short_host_name."] to stop all DRBD resources. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
sleep 10;
}
}
}
if ($anvil->data->{switches}{'clear-cache'})
my $update_switches = "";
if ($anvil->data->{switches}{'no-reboot'})
{
$shell_call .= " --clear-cache";
$update_switches .= " --no-reboot";
}
if ($anvil->data->{switches}{reboot})
{
$shell_call .= " --reboot";
$update_switches .= " --reboot";
}
$shell_call .= $anvil->Log->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update_switches => $update_switches }});
# We register a job, even though anvil-daemon isn't running. This will get
# picked up by 'anvil-update-systems --no-db' towards the end of it's run.
print "- Registering a job to update the system, which we can track to confirm when the update is done.\n";
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-system'}.$update_switches.$anvil->Log->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my $job_uuid = $anvil->Database->insert_or_update_jobs({
debug => 2,
@ -886,13 +1112,19 @@ sub update_strikers_and_dr
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
print "- [ Note ] - Job registered with UUID: [".$job_uuid."], waiting for it to complete.\n";
# Log into the target machine and make sure anvil-daemon is running.
print "- Making sure anvil-daemon is running... ";
$shell_call = $anvil->data->{path}{exe}{systemctl}." start anvil-daemon.service";
# Now call anvil-update-system with --no-db and background it so we can close
# the DB connection without killing the process.
print "- Calling the no-database update of: [".$short_host_name."]\n";
$shell_call = $anvil->data->{path}{exe}{nohup}." ".$anvil->data->{path}{exe}{'anvil-update-system'}." --no-db".$update_switches;
if ($anvil->data->{switches}{'clear-cache'})
{
# We'll only call clear-cache on this one.
$shell_call .= " --clear-cache";
}
$shell_call .= $anvil->Log->switches()." >/dev/null 2>&1 &";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
'close' => 1,
no_cache => 1,
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
@ -901,18 +1133,11 @@ sub update_strikers_and_dr
error => $error,
return_code => $return_code,
}});
if (not $return_code)
{
print " running.\n";
}
else
{
print " not running!\n";
}
# Verify that the node is no longer in the cluster.
my $waiting = 1;
my $next_log = time + 60;
# Verify / wait until the update is done.
my $wait_until = time + $anvil->data->{switches}{timeout};
my $waiting = 1;
my $next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
while ($waiting)
{
@ -946,16 +1171,31 @@ sub update_strikers_and_dr
{
if (time > $next_log)
{
my $say_date = $anvil->Get->date_and_time({time_only => 1});
print "[ Note ] - [".$say_date."] - The job progress is: [".$anvil->data->{jobs}{job_progress}."], continuing to wait.\n";
if ($anvil->data->{jobs}{job_progress} == 0)
{
print "[ Warning ] - The job has not been picked up yet. Is 'anvil-daemon' running on: [".$short_host_name."]?\n";
}
else
{
print "[ Note ] - [".$anvil->Get->date_and_time({time_only => 1})."] - The job progress is: [".$anvil->data->{jobs}{job_progress}."], continuing to wait.\n";
print "[ Note ] - [".$say_date."] - It is normal for the job to show '0' progress until the database access is restored.\n";
}
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
next_log => $next_log,
time_left => $time_left,
say_time_left => $say_time_left,
}});
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the machine: [".$short_host_name."] to update the OS. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
sleep 5;
}
@ -991,6 +1231,8 @@ sub update_strikers_and_dr
else
{
($output, $error, $return_code) = $anvil->Remote->call({
'close' => 1,
no_cache => 1,
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
@ -1025,8 +1267,9 @@ sub wait_for_reboot
print "- The target has been rebooted. We'll wait for the target to come back online.\n";
# This is an infinite loop, there is no timeout for this.
my $waiting = 1;
my $next_log = time + 60;
my $wait_until = time + $anvil->data->{switches}{timeout};
my $waiting = 1;
my $next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
while($waiting)
{
@ -1064,10 +1307,30 @@ sub wait_for_reboot
{
if (time > $next_log)
{
my $say_time = $anvil->Get->date_and_time({time_only => 1});
$next_log = time + 60;
my $time_left = $wait_until - time;
my $say_time_left = $anvil->Convert->time({
'time' => $time_left,
translate => 1,
long => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:say_time' => $say_time,
's2:next_log' => $next_log,
's3:time_left' => $time_left,
's4:say_time_left' => $say_time_left,
}});
# Tell the user we're still waiting.
print "- [".$anvil->Get->date_and_time({time_only => 1})."] - We're still waiting for the subnode: [".$short_host_name."] to reboot.\n";
$next_log = time + 60;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_log => $next_log }});
print "- [".$say_time."] - We're still waiting for the subnode: [".$short_host_name."] to reboot.\n";
print "- Waiting for another: [".$say_time_left."], will check again shortly.\n";
}
if (time > $wait_until)
{
# Timeout.
print "[ Error ] - Timed out while waiting for the subnode: [".$short_host_name."] to reboot. Aborting the update.\n";
$anvil->nice_exit({exit_code => 1});
}
sleep 5;
@ -1222,14 +1485,17 @@ sub verify_access
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
foreach my $preferred_network ("bcn", "mn", "ifn", "sn", "any")
{
next if $anvil->data->{peer}{$short_host_name}{access}{ip};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { network_name => $network_name }});
next if $network_name !~ /^$preferred_network/;
if (($network_name !~ /^$preferred_network/) && ($preferred_network ne "any"))
{
next;
}
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({

Loading…
Cancel
Save