Merge pull request #361 from ClusterLabs/anvil-tools-dev

Anvil tools dev
main
Digimer 1 year ago committed by GitHub
commit bf288fda49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      Anvil/Tools.pm
  2. 8
      Anvil/Tools/Cluster.pm
  3. 31
      Anvil/Tools/DRBD.pm
  4. 1
      Anvil/Tools/Database.pm
  5. 12
      Anvil/Tools/Get.pm
  6. 23
      Anvil/Tools/Job.pm
  7. 8
      Anvil/Tools/Network.pm
  8. 37
      Anvil/Tools/Remote.pm
  9. 6
      Anvil/Tools/Storage.pm
  10. 6
      man/Makefile.am
  11. 2
      man/anvil-boot-server.8
  12. 45
      man/anvil-manage-power.8
  13. 32
      man/anvil-special-operations.8
  14. 39
      man/anvil-update-system.8
  15. 2
      man/striker-check-machines.8
  16. 45
      man/striker-collect-debug.8
  17. 53
      man/striker-update-cluster.8
  18. 27
      share/words.xml
  19. 5
      tools/Makefile.am
  20. 7
      tools/anvil-daemon
  21. 28
      tools/anvil-manage-dr
  22. 25
      tools/anvil-manage-power
  23. 1312
      tools/anvil-manage-server-storage
  24. 3
      tools/anvil-provision-server
  25. 10
      tools/anvil-safe-stop
  26. 120
      tools/anvil-special-operations
  27. 161
      tools/anvil-update-system
  28. 797
      tools/striker-collect-debug
  29. 1254
      tools/striker-update-cluster

@ -851,9 +851,6 @@ sub _set_defaults
},
};
$anvil->data->{sys} = {
apache => {
user => "admin",
},
daemon => {
dhcpd => "dhcpd.service",
firewalld => "firewalld.service",
@ -1150,6 +1147,7 @@ sub _set_paths
'anvil-safe-start' => "/usr/sbin/anvil-safe-start",
'anvil-safe-stop' => "/usr/sbin/anvil-safe-stop",
'anvil-shutdown-server' => "/usr/sbin/anvil-shutdown-server",
'anvil-special-operations' => "/usr/sbin/anvil-special-operations",
'anvil-sync-shared' => "/usr/sbin/anvil-sync-shared",
'anvil-update-files' => "/usr/sbin/anvil-update-files",
'anvil-update-states' => "/usr/sbin/anvil-update-states",
@ -1261,6 +1259,7 @@ sub _set_paths
'shutdown' => "/usr/sbin/shutdown",
snmpget => "/usr/bin/snmpget",
snmpset => "/usr/bin/snmpset",
'sort' => "/usr/bin/sort",
'ssh-keygen' => "/usr/bin/ssh-keygen",
'ssh-keyscan' => "/usr/bin/ssh-keyscan",
'stat' => "/usr/bin/stat",
@ -1282,6 +1281,8 @@ sub _set_paths
swapon => "/usr/sbin/swapon",
sysctl => "/usr/sbin/sysctl",
systemctl => "/usr/bin/systemctl",
tail => "/usr/bin/tail",
tar => "/usr/bin/tar",
timeout => "/usr/bin/timeout",
touch => "/usr/bin/touch",
tput => "/usr/bin/tput",

@ -2441,7 +2441,7 @@ sub get_peers
=head2 get_primary_host_uuid
This takes an Anvil! UUID and returns with node is currently the "primary" node. That is to say, which node has the most servers running on it, by allocated RAM. For example, if node 1 has two servers, each with 8 GiB of RAN and node 2 has one VM with 32 GiB of RAM, node 2 will be considered primary as it would take longest to migrate servers off.
This takes an Anvil! UUID and returns with the node's host UUID that is currently the "primary" node. That is to say, which node has the most servers running on it, by allocated RAM. For example, if node 1 has two servers, each with 8 GiB of RAN and node 2 has one VM with 32 GiB of RAM, node 2 will be considered primary as it would take longest to migrate servers off.
If all is equal, node 1 is considered primary. If only one node is a cluster member, it is considered primary. If neither node is up, an empty string is returned.
@ -2478,8 +2478,11 @@ sub get_primary_host_uuid
return("");
}
# Get the two node UUIDs.
# Get the two node UUIDs, if not already loaded
if (not exists $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid})
{
$anvil->Database->get_anvils({debug => $debug});
}
if (not exists $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid})
{
@ -2600,6 +2603,7 @@ sub get_primary_host_uuid
my $node2_ram_in_use_by_servers = 0;
# Loop through servers.
$anvil->Database->get_servers({debug => $debug});
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}})
{
my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid};

@ -2146,6 +2146,10 @@ If any data for the host was stored in a previous call, it will be deleted befor
Parameters;
=head3 host (optional)
By default, the hash key C<< host_name >> listed above is either the local system's short host name, or the C<< target >>. If you'd like to use a specific host name in the hash key, you can use this parameter to set it.
=head3 password (optional)
This is the password to use when connecting to a remote machine. If not set, but C<< target >> is, an attempt to connect without a password will be made.
@ -2172,22 +2176,42 @@ sub get_status
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "DRBD->get_status()" }});
my $host = defined $parameter->{host} ? $parameter->{host} : "";
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $port = defined $parameter->{port} ? $parameter->{port} : "";
my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root";
my $target = defined $parameter->{target} ? $parameter->{target} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
host => $host,
password => $anvil->Log->is_secure($password),
port => $port,
remote_user => $remote_user,
target => $target,
}});
# If we weren't passed a host, use this machine's short host name.
my $is_local = $anvil->Network->is_local({host => $target});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }});
if (not $host)
{
# Host not set, set one.
if ($is_local)
{
$host = $anvil->Get->short_host_name();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host => $host }});
}
else
{
# Remote, using the target as the host.
$host = $target;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host => $host }});
}
}
# Is this a local call or a remote call?
my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." status --json";
my $output = "";
my $host = $anvil->Get->short_host_name();
my $is_local = $anvil->Network->is_local({host => $target});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }});
if ($is_local)
{
# Local.
@ -2200,7 +2224,6 @@ sub get_status
else
{
# Remote call.
$host = $target;
($output, my $error, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
@ -2291,12 +2314,14 @@ sub get_status
$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{congested} = $hash_ref->{connections}->[$i]->{congested};
$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'connection-state'} = $hash_ref->{connections}->[$i]->{'connection-state'};
$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-node-id'} = $hash_ref->{connections}->[$i]->{'peer-node-id'};
$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-role'} = $hash_ref->{connections}->[$i]->{'peer-role'};
$anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'rs-in-flight'} = $hash_ref->{connections}->[$i]->{'rs-in-flight'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::ap-in-flight" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'ap-in-flight'},
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::congested" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{congested},
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::connection-state" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'connection-state'},
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::peer-node-id" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-node-id'},
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::peer-role" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'peer-role'},
"drbd::status::${host}::resource::${resource}::connection::${peer_name}::rs-in-flight" => $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer_name}{'rs-in-flight'},
}});

@ -4491,6 +4491,7 @@ WHERE
}
### TODO: Delete this and convert over to Jobs->get_job_details()
=head2 get_job_details
This gets the details for a given job. If the job is found, a hash reference is returned containing the tables that were read in.

@ -161,10 +161,10 @@ sub anvil_from_switch
"switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid},
}});
}
elsif (exists $anvil->data->{anvils}{anvil_uuid}{$anvil_string})
elsif (exists $anvil->data->{anvils}{anvil_name}{$anvil_string})
{
$anvil->data->{switches}{anvil_name} = $anvil_string;
$anvil->data->{switches}{anvil_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_string}{anvil_uuid};
$anvil->data->{switches}{anvil_uuid} = $anvil->data->{anvils}{anvil_name}{$anvil_string}{anvil_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"switches::anvil_name" => $anvil->data->{switches}{anvil_name},
"switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid},
@ -326,7 +326,7 @@ sub anvil_version
schema_cache_file => $schema_cache_file,
user => $user,
}});
if ($user eq "apache")
if (($user eq "apache") or ($user eq "striker-ui-api"))
{
# Try to read the local cached version.
if (-e $anvil_cache_file)
@ -1867,8 +1867,8 @@ sub host_uuid
debug => $debug,
file => $anvil->data->{path}{data}{host_uuid},
body => $uuid,
user => "apache",
group => "apache",
user => "striker-ui-api",
group => "striker-ui-api",
mode => "0666",
overwrite => 0,
});
@ -2529,7 +2529,7 @@ sub switches
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { found => $found }});
if (not $found)
{
print "Switch '--".$set_switch." not recognized.\n";
print "Switch '--".$set_switch."' is not recognized.\n";
$problem = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
}

@ -756,29 +756,6 @@ WHERE
$job_status =~ s/message_0058,!!downloaded!.*?!!,!!installed!.*?!!,!!verified!.*?!!,!!lines!.*?!!/message_0058,!!downloaded!$downloaded!!,!!installed!$installed!!,!!verified!$verified!!,!!lines!$lines!!/sm;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "<< job_status" => $job_status }});
}
# This is used by 'anvil-download-file'
if ($job_status =~ /message_0142/gs)
{
### NOTE: Is this needed anymore?
# my $downloaded = $anvil->data->{counts}{downloaded} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{downloaded}}) : 0;
# my $installed = $anvil->data->{counts}{installed} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{installed}}) : 0;
# my $verified = $anvil->data->{counts}{verified} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{verified}}) : 0;
# my $lines = $anvil->data->{counts}{lines} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{lines}}) : 0;
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
# "s1:counts::downloaded" => $anvil->data->{counts}{downloaded},
# "s2:downloaded" => $downloaded,
# "s3:counts::installed" => $anvil->data->{counts}{installed},
# "s4:installed" => $installed,
# "s5:counts::verified" => $anvil->data->{counts}{verified},
# "s6:verified" => $verified,
# "s7:counts::lines" => $anvil->data->{counts}{lines},
# "s8:lines" => $lines,
# }});
#
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ">> job_status" => $job_status }});
# $job_status =~ s/message_0142,!!downloaded!.*?!!,!!installed!.*?!!,!!verified!.*?!!,!!lines!.*?!!/message_0058,!!downloaded!$downloaded!!,!!installed!$installed!!,!!verified!$verified!!,!!lines!$lines!!/sm;
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "<< job_status" => $job_status }});
}
$job_uuid = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,

@ -1212,7 +1212,7 @@ Paramters;
=head3 target (required)
This is the host we're looking for connection options with.
This is the host (name or UUID) we're looking for connection options with.
=cut
sub find_access
@ -1662,6 +1662,12 @@ sub get_company_from_mac
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { company => $company }});
}
if ((not $company) && ($mac =~ /^52:54:00/))
{
$company = "KVM/qemu";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { company => $company }});
}
return($company);
}

@ -302,7 +302,7 @@ sub call
# Now pick up the rest of the variables.
my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 0;
my $no_cache = defined $parameter->{no_cache} ? $parameter->{no_cache} : 0;
my $password = defined $parameter->{password} ? $parameter->{password} : $anvil->data->{sys}{root_password};
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $secure = defined $parameter->{secure} ? $parameter->{secure} : 0;
my $shell_call = defined $parameter->{shell_call} ? $parameter->{shell_call} : "";
my $timeout = defined $parameter->{timeout} ? $parameter->{timeout} : 10;
@ -311,16 +311,26 @@ sub call
# NOTE: The shell call might contain sensitive data, so we show '--' if 'secure' is set and $anvil->Log->secure is not.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
'close' => $close,
no_cache => $no_cache,
password => $anvil->Log->is_secure($password),
secure => $secure,
shell_call => (not $secure) ? $shell_call : $anvil->Log->is_secure($shell_call),
ssh_fh => $ssh_fh,
start_time => $start_time,
timeout => $timeout,
port => $port,
target => $target,
ssh_fh_key => $ssh_fh_key,
}});
if ((not $password) && (defined $anvil->data->{sys}{root_password}))
{
$password = $anvil->data->{sys}{root_password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
password => $anvil->Log->is_secure($password),
}});
}
# In case 'target' is our short host name, change it to ''.
if ($target eq $anvil->Get->short_host_name())
{
@ -625,6 +635,19 @@ sub call
{
$error = $anvil->Words->string({key => $message_key, variables => $variables});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => $message_key, variables => $variables});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
'close' => $close,
password => $anvil->Log->is_secure($password),
secure => $secure,
shell_call => (not $secure) ? $shell_call : $anvil->Log->is_secure($shell_call),
ssh_fh => $ssh_fh,
start_time => $start_time,
timeout => $timeout,
port => $port,
target => $target,
ssh_fh_key => $ssh_fh_key,
}});
}
}
@ -667,6 +690,10 @@ sub call
error => $ssh_fh->error,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { error => $error }});
# Close the connection.
$close = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { 'close' => $close }});
}
# Take the last new line off.
@ -914,6 +941,10 @@ This attempts to log into the target to verify that the target is up and reachab
Parameters;
=head3 close (optional, default '1')
If set, the SSH connection used to test the access to the remote host wil be closed. This can be useful it there might be a delay between when the connecton is tested and when it is used again.
=head3 password (optional)
This is the password used to connect to the remote target as the given user.
@ -941,12 +972,14 @@ sub test_access
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Remote->test_access()" }});
my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 1;
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $port = defined $parameter->{port} ? $parameter->{port} : 22;
my $target = defined $parameter->{target} ? $parameter->{target} : "";
my $user = defined $parameter->{user} ? $parameter->{user} : getpwuid($<);
my $access = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => {
'close' => $close,
password => $anvil->Log->is_secure($password),
port => $port,
target => $target,
@ -960,7 +993,7 @@ sub test_access
shell_call => $anvil->data->{path}{exe}{echo}." 1",
target => $target,
remote_user => $user,
'close' => 1,
'close' => $close,
no_cache => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {

@ -452,7 +452,7 @@ sub change_mode
This changes the owner and/or group of a file or directory.
$anvil->Storage->change_owner({path => "/tmp/foo", user => "apache", group => "apache" });
$anvil->Storage->change_owner({path => "/tmp/foo", user => "striker-ui-api", group => "striker-ui-api" });
If it fails to write the file, an alert will be logged and 'C<< 1 >>' will be returned. Otherwise, 'C<< 0 >>' will be returned.
@ -4972,11 +4972,11 @@ sub update_config
body => $new_file,
debug => $debug,
file => $anvil->data->{path}{configs}{'anvil.conf'},
group => "apache",
group => "striker-ui-api",
mode => "0640",
overwrite => 1,
secure => 1,
user => "apache",
user => "striker-ui-api",
password => $password,
port => $port,
target => $target,

@ -18,10 +18,14 @@ dist_man8_MANS = \
anvil-manage-dr.8 \
anvil-manage-files.8 \
anvil-manage-keys.1 \
anvil-manage-power.8 \
anvil-manage-server.8 \
anvil-manage-server-storage.8 \
anvil-manage-storage-groups.8 \
anvil-special-operations.8 \
anvil-watch-drbd.8 \
scancore.8 \
striker-check-machines.8 \
striker-initialize-host.8
striker-collect-debug.8 \
striker-initialize-host.8 \
striker-update-cluster.8

@ -40,7 +40,7 @@ This is the server UUID of the server to boot. Generally this isn't needed, exce
\fB\-\-wait\fR
When using '\fB\-\-server\fR all', the request to boot each server will normally not wait for the server to boot. When this is set, this behaviour is changed and the boot will wait before moving on to boot the next server.
.TP
Be away that when this is used, if a server fails to boot, no further servers will be started.
Be aware that when this is used, if a server fails to boot, no further servers will be started.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.

@ -0,0 +1,45 @@
.\" Manpage for the Anvil! power management tool
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-manage-power "8" "July 11 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-manage-power \- This program can power off, reboot, or set a flag indicating one of these actions are required.
.SH SYNOPSIS
.B anvil-manage-power
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program can mark a machine as needing to be powered off or rebooted, or perform those actions directly or as a job.
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-no-wait\fR
.TP
Normally, this program will not reboot a machine until the uptime is over five minutes. This is done to provide a chance for someone to log in and disable anvil-daemon in the case of a reboot loop. This switch prevents waiting for that 5 minute delay.
.TP
\fB\-\-poweroff\fR, \fB\-\-power\-off\fR
.TP
This powers off the host.
.TP
\fB\-\-reboot\fR
.TP
This reboots the host.
.TP
\fB\-\-reboot\-needed\fR [0,1]
.TP
This sets (1) or clears (0) the 'reboot needed' flag for the host system.
.TP
\fB\-\-y\fR, \fB\-\-yes\fR
.TP
If passed, requests to reboot or power off won't ask for confirmation.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,32 @@
.\" Manpage for the Anvil! storage groups
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-special-operations "8" "Jun 30 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-special-operations \- This program is generally meant to be used by other programs.
.SH SYNOPSIS
.B anvil-special-operations
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This tool is used, generally by other parts of the Anvil!, the accomplish tasks that generally can't be accomplished by direct system calls. It's a general purpose tool meant to solve specific corner cases.
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-task\fR
This is the task being requested. Current optiopns are:
.IP refresh-drbd-resource
This requires \fB\-\-resource <new name>\fR, and will call 'drbdadm adjust <resource>' as a background task and then return immediately. This is required when adding a new volume to an existing resource as 'drbdadm adjust <res>' will hold until it is called on all active DRBD nodes. This blocks the caller after the first remote host call.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,39 @@
.\" Manpage for the Anvil! cluster update tool.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-update-system "8" "July 14 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-update-system \- This program updates the local operting system
.SH SYNOPSIS
.B anvil-update-system
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program updates the local operating system. If the kernel is updated, a reboot will be performed.
.TP
.B Note:
.TP
If the host is an Anvil! subnode, the subnode will be removed from the Anvil! node (and servers migrated off, or, shut down if the peer subnode is offline).
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-clear\-cache\fR
.TP
This will force the dnf cache to be cleared before the OS update is started. This slows the update down a bit, but ensures the latest updates are installed.
.TP
\fB\-\-no\-reboot\fR
.TP
If the kernel is updated, the system will normally be rebooted. This switch prevents the reboot from occuring.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -22,7 +22,7 @@ Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a s
.SS "Commands:"
.TP
This program takes no commands.
.TP
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"

@ -0,0 +1,45 @@
.\" Manpage for the Anvil! machine power and access reporting tool.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH striker-collect-data "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
striker-collect-data \- This program collects data needed to help diagnose problems with an Anvil! system.
.SH SYNOPSIS
.B striker-collect-data
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program collects database data, logs, config files and other information needed to help diagnose problems with the Anvil! platform. By default, this collects all data from all accessible machines.
.TP
.B Note:
.TP
This program collects potentially secure information, like passwords. Be careful who you share the collected data with!
.TP
The data from Striker dashboards are always collected.
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-anvil\fR <name or UUID>
.TP
This restricts the data to be collected to the Striker dashboards and the specific Anvil! node pair.
.TP
\fB\-\-hosts\fR <comma-separated list of host names or UUIDs>
.TP
This can be used to specify which specific hosts data is collected from. Note that this can be used in conjuction with \fB\-\-anvil\fR to add additional hosts to collect data from, like DR hosts.
.TP
\fB\-\-output\-file\fR </path/to/file.tar.bz2>
.TP
This allows you to specify the output compressed tarball that the files will be saved in. By default, the output file is \fB/root/anvil-debug_<timestamp>.tar.bz2\fR. If this is a directory (ending in \fB/\fR), the normal file name is created, just in a different directory. If the path ends in a file that doesn't have the \fB.tar.bz2\fR suffix, that suffix will be added automatically. The output file will always be a bzip2's tarball.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -0,0 +1,53 @@
.\" Manpage for the Anvil! cluster update tool.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH striker-update-cluster "8" "July 11 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
striker-update-cluster \- This program updates all physical machines in an Anvil! cluster
.SH SYNOPSIS
.B striker-update-cluster
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program sequentially updates Striker dashboards, DR hosts and Anvil! nodes (the paired sub-nodes). It does this without needing to take hosted servers offline.
.TP
.B Note:
.TP
This program requires all machines be online, and Anvil! nodes being paired and sync'ed. When nodes are updated, the inactive subnode will be removed from the node, updated, rebooted if necessary, and then rejoined to the node. Then hosted servers will migrate to the now-updated subnode, and the process repeated for the other subnode. Anvil! nodes are updated sequentially, so the process can take some time to complete, but should not require a maintenance window.
.TP
The upgrade process will live-migrate all hosted servers! If any hosted server is either under heavy load, or the replication link (the BCN or MN) is relatively lower bandwidth, this could cause performance concerns. As such, it's ideal to run the upgrades at a time less sensitive to performance impacts.
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-clear\-cache\fR
.TP
This will force the dnf cache to be cleared before the OS update is started. This slows the update down a bit, but ensures the latest updates are installed.
.TP
\fB\-\-force\fR
.TP
If any Striker dashboards or DR hosts are unavailable, or if an entire node (paired subnodes) is offline, this switch will allow you to force the upgrade attempt.
.TP
\fB\-y\fR, \fB\-\-yes\fR
.TP
Automatically continue with the upgrade without prompting for confirmation.
.TP
\fB\-\-no\-reboot\fR
.TP
If the kernel is updated on a remote system, the system will normally be rebooted. This switch prevents the reboot from occuring.
.TP
\fB\-\-reboot\-self\fR
.TP
By default, if the local system needs to be updated, a message is printed but the local system is NOT rebooted. This switch will instead cause this host to reboot at the end of the cluster update.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -602,6 +602,11 @@ The error was:
</key>
<key name="error_0417">There was a problem with finding a common storage network between: [#!variable!node1_name!#] and: [#!variable!node2_name!#]. Found node 1 to have the IP: [#!variable!node1_ip!#] and node 2: [#!variable!node2_ip!#]. Is there a problem with '/etc/hosts'?</key>
<key name="error_0418">Failed to find a network to use for storage replication. Is there a problem with '/etc/hosts'?</key>
<key name="error_0419"><![CDATA[[ Error ] - The resource to refresh must be provide with '--resource <res>'.]]></key>
<key name="error_0420">Failed to withdraw the subnode from the node's cluster. Expected the 'anvil-safe-stop' call to return '0', but got: [#!variable!return_code!#]. The output, if anything, was:
========
#!variable!output!#
========</key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which lines are translatable -->
@ -1552,6 +1557,12 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0462"><![CDATA[ --driver-disc - (optional) A driver disc to be added as a second optical drive. Valid options are above.]]></key>
<key name="job_0463">Enabling the enable-safe-start daemon.</key>
<key name="job_0464">Calling select ScanCore scan agents to ensure the database is updated.</key>
<key name="job_0465">Reload (adjust) a DRBD resource</key>
<key name="job_0466">This job is to reload (adjust) a DRBD resource. It's run as a job as it blocks until the adjust is run on all nodes.</key>
<key name="job_0467">Update the base operating system.</key>
<key name="job_0468">This uses 'dnf' to do an OS update on the host. If this is run on a node, 'anvil-safe-stop' will be called to withdraw the subnode from the node's cluster. If the peer subnode is also offline, hosted servers will be shut down.</key>
<key name="job_0469">Update beginning. Verifying all known machines are accessible...</key>
<key name="job_0470"></key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>
@ -2405,7 +2416,10 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="message_0005">There was an unknown error while connecting as: [#!variable!user!#] to: [#!variable!remote_user!#@#!variable!target!#]. The error was: [#!variable!error!#]</key>
<key name="message_0006">We were unable to log in to: [#!variable!connection!#]. Please check that the password is correct or that passwordless SSH is configured properly.</key>
<key name="message_0007">An SSH session was successfully opened to: [#!variable!target!#].</key>
<key name="message_0008">The remote shell call: [#!variable!shell_call!#] to: [#!variable!connection!#] failed with the error: [#!variable!error!#].</key>
<key name="message_0008">The remote shell call: [#!variable!shell_call!#] to: [#!variable!connection!#] failed with the error:
====
#!variable!error!#
====</key>
<key name="message_0009">The SSH session to: [#!variable!target!#] was successfully closed.</key>
<key name="message_0010">The SSH session to: [#!variable!target!#] was closed because 'no_cache' was set and there was an open SSH connection.</key>
<key name="message_0011">Wrote the system UUID to the file: [#!variable!file!#] to enable the web based tools to read this system's UUID.</key>
@ -2893,6 +2907,15 @@ Proceed? [y/N]</key>
<key name="message_0308">The DRBD config file was not found. A protect job needs to be run from the Anvil! node hosting the server to be protected.</key>
<key name="message_0309">Waiting a bit to make sure the file: [#!variable!file!#] is done uploading...</key>
<key name="message_0310">Upload complete.</key>
<key name="message_0311">Picked up the special operation job.</key>
<key name="message_0312">Reloading (adjusting) the DRBD resource: [#!variable!resource!#]. This will not complete until all peers have also reloaded this resource.</key>
<key name="message_0313">DRBD resource: [#!variable!resource!#] has been reloaded.</key>
<key name="message_0314">Checking if the subnode is out of the node's cluster before updating the OS.</key>
<key name="message_0315">The subnode is in the node's cluster, asking it to withdraw. This could take some time if servers need to be migrated.</key>
<key name="message_0316">Cleared 'dnf' cache.</key>
<key name="message_0317">The kernel was updated, so a reboot is required. Rebooting now.</key>
<key name="message_0318">Registering a job to reboot this host.</key>
<key name="message_0319">Preparing to update the entire Anvil! cluster.</key>
<!-- Translate names (protocols, etc) -->
<key name="name_0001">Normal Password</key> <!-- none in mail-server -->
@ -3590,7 +3613,7 @@ We will sleep a bit and try again.
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
<key name="warning_0139">[ Warning ] - We're using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). but there is a job: [#!variable!job_command!#] is runnng, which might be why the RAM is high. NOT exiting while this program is running.</key>
<key name="warning_0139">[ Warning ] - We're using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). but there is a job: [#!variable!job_command!#] is runnng, (progress is: [#!variable!job_progress!#]), which might be why the RAM is high. NOT exiting while this program is running.</key>
<key name="warning_0140">[ Warning ] - A no-longer active PID: [#!variable!pid!#] (used by: [#!variable!caller!#] had marked the database: [#!variable!db!#] as "in_use", but the PID is gone now. Reaping the flag.</key>
<key name="warning_0141">[ Warning ] - We waited for: [#!variable!wait_time!#] seconds for all users of the local database to exit. Giving up waiting and taking the database down now.</key>
<key name="warning_0142">[ Warning ] - The command: [#!variable!command!#] is still using our database.</key>

@ -37,6 +37,7 @@ dist_sbin_SCRIPTS = \
anvil-scan-network \
anvil-show-local-ips \
anvil-shutdown-server \
anvil-special-operations \
anvil-sync-shared \
anvil-test-alerts \
anvil-update-definition \
@ -51,6 +52,7 @@ dist_sbin_SCRIPTS = \
striker-auto-initialize-all \
striker-boot-machine \
striker-check-machines \
striker-collect-debug \
striker-db-report \
striker-db-status \
striker-file-manager \
@ -65,7 +67,8 @@ dist_sbin_SCRIPTS = \
striker-prep-database \
striker-purge-target \
striker-scan-network \
striker-show-db-counts
striker-show-db-counts \
striker-update-cluster
fencedir = ${FASEXECPREFIX}/sbin

@ -252,8 +252,8 @@ sub check_ram
}});
if ($problem)
{
# See if an 'anvil-sync-shared' job is running and, if so, don't exit. The file copy is
# counted and not an actual problem.
# See if any jobs are running, and if so, hold because those jobs might be doing things (like
# OS updates or file syncs) that could make anvil-daemon appear to be using more memory.
$anvil->Database->get_jobs({debug => 2});
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}})
{
@ -264,11 +264,12 @@ sub check_ram
job_progress => $job_progress,
}});
if (($job_progress != 100) && ($job_command =~ /anvil-sync-shared/))
if (($job_progress != 100) && ($job_progress != 0))
{
# Don't abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => {
job_command => $job_command,
job_progress => $job_progress,
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
}});

@ -383,6 +383,7 @@ sub sanity_check
{
# Is this server configured to be protected?
my $config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$server_name.".res";
$config_file =~ s/\/\//\//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { config_file => $config_file }});
if (not -e $config_file)
{
@ -398,9 +399,9 @@ sub sanity_check
variables => $variables,
job_status => "failed",
});
}
$anvil->nice_exit({exit_code => 1});
}
}
# If we're doing a --protect or --remove, make sure we're a node, the cluster is up, and both nodes
# are ready.
@ -533,7 +534,6 @@ sub sanity_check
# Get the Anvil! details.
$anvil->Database->get_hosts();
$anvil->Database->get_anvils();
$anvil->Database->get_storage_group_data({debug => 2});
$anvil->Database->get_dr_links({debug => 2});
@ -559,7 +559,9 @@ sub sanity_check
}
}
# If I don't have a dr_host_uuid yet, see which are available. If only one, use it. If two or more, tell the user they need to specify which.
# If I don't have a dr_host_uuid yet, see which are available. If only one, use it. If two or more,
# and if the server is already being protected, determine which to use from it's config. Otherwise,
# tell the user they need to specify which.
if (not $dr_host_uuid)
{
my $dr_count = keys %{$anvil->data->{dr_links}{by_anvil_uuid}{$anvil_uuid}{dr_link_host_name}};
@ -587,6 +589,26 @@ sub sanity_check
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_host_uuid => $dr_host_uuid }});
}
}
else
{
# Two or more. Is the server already protected? If so, try to find which DR it's
# using.
if (($anvil->data->{switches}{'connect'}) or ($anvil->data->{switches}{'disconnect'}))
{
# Read the config.
my $config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$server_name.".res";
$config_file =~ s/\/\//\//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { config_file => $config_file }});
my $resource_config = $anvil->Storage->read_file({file => $config_file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_config => $resource_config }});
foreach my $line (split/\n/, $resource_config)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lineg => $line }});
}
}
}
}
# If I still don't have a DR host, fail out.

@ -34,17 +34,15 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
my $anvil = Anvil::Tools->new();
# Read switches
$anvil->data->{switches}{'poweroff'} = "";
$anvil->data->{switches}{'power-off'} = "";
$anvil->data->{switches}{'reboot'} = "";
$anvil->data->{switches}{'y'} = "";
$anvil->data->{switches}{'yes'} = "";
$anvil->data->{switches}{'reboot-needed'} = "";
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'no-delay'} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"no-wait",
"power-off",
"poweroff",
"reboot",
"reboot-needed"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
if ($anvil->data->{switches}{'power-off'})
{
@ -188,10 +186,13 @@ sub do_poweroff
my ($anvil, $task) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { task => $task }});
# In case we're being called by another job, we'll sleep for a few second to let those close out.
sleep 3;
# We'll wait until the system has at least 5 minutes of uptime, unless '--no-wait' was given.
my $uptime = $anvil->data->{switches}{'no-wait'} ? 0 : $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::no-wait" => $anvil->data->{switches}{'no-delay'},
"switches::no-wait" => $anvil->data->{switches}{'no-wait'},
uptime => $uptime,
}});

File diff suppressed because it is too large Load Diff

@ -848,7 +848,8 @@ sub startup_resource
my $short_host_name = $anvil->data->{job}{short_host_name};
my $role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server}{role} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'job::server' => $anvil->data->{job}{server_name},
server => $server,
short_host_name => $short_host_name,
role => $role,
}});

@ -274,9 +274,9 @@ sub process_servers
's2:progress_steps' => $progress_steps,
}});
# If we have one or more local servers, we need to know if both of us are in the cluster. If we're
# not, or the peer isn't, we can't migrate.
my $can_migrate = 0;
# If we have one or more local servers, we need to know if both subnodes are in the node's cluster.
# If we're not, or the peer isn't, we can't migrate.
my $can_migrate = 1;
if ($server_count)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
@ -287,18 +287,20 @@ sub process_servers
}});
if ($problem)
{
# We're not in the node's cluster, we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
elsif ((not $anvil->data->{cib}{parsed}{'local'}{ready}) or (not $anvil->data->{cib}{parsed}{peer}{ready}))
{
# One of the subnodes is not in the cluster, so we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
if ((not $anvil->data->{switches}{'stop-servers'}) && (not $can_migrate))
{
# Abort.
# We would have to stop the servers, and the user didn't tell us to do that, abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0372"});
$anvil->Job->update_progress({progress => 100, message => "error_0372"});
$anvil->nice_exit({exit_code => 1});

@ -0,0 +1,120 @@
#!/usr/bin/perl
#
# This program has no specific purpose. It's a general program for performing certain special tasks that
# can't be done otherwise in a reliable or efficient way.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connection.
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Text::Diff;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"task",
"resource",
], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
if ($anvil->data->{switches}{'job-uuid'})
{
$anvil->Job->clear();
$anvil->Job->get_job_details({debug => 2});
$anvil->Job->update_progress({
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0311",
});
}
if ($anvil->data->{switches}{task} eq "refresh-drbd-resource")
{
refresh_drbd_resource($anvil);
}
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# This function is needed to call 'drbdadm adjust <res>' in a background call from a remote host. This is
# needed for adding new volumes to an existing resource, as the call from 'drbdadm adjust <res>' won't return
# until the call is run on all hosts.
sub refresh_drbd_resource
{
my ($anvil) = @_;
my $resource = $anvil->data->{switches}{resource};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
if (not $resource)
{
# No resource.
$anvil->Job->update_progress({
progress => 100,
message => "error_0419",
job_status => "failed",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0419"});
$anvil->nice_exit({exit_code => 1});
}
$anvil->Job->update_progress({
progress => 10,
message => "message_0312,!!resource!".$resource."!!",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0312", variables => { resource => $resource }});
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({
shell_call => $shell_call,
background => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
$anvil->Job->update_progress({
progress => 100,
message => "message_0313,!!resource!".$resource."!!",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0313", variables => { resource => $resource }});
$anvil->nice_exit({exit_code => 0});
return(0);
}

@ -15,6 +15,7 @@
# 1 = No database connections available.
# 2 = The job UUID was passed, but it wasn't valid.
# 3 = It looks like the update failed, reset progress to '0'.
# 4 = Failed to withdraw the node from the cluster.
#
# TODO:
# - Rebuild this to be 'striker-update-system' and have it update local strikers and all nodes.
@ -36,10 +37,12 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
my $anvil = Anvil::Tools->new();
# Read switches
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"clear-cache",
"no-reboot"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Log that we've started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
@ -90,7 +93,49 @@ my $reboot_needed = $anvil->System->reboot_needed({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
if ($reboot_needed)
{
if (not $anvil->data->{switches}{'no-reboot'})
{
# Clear maintenance mode.
$anvil->System->maintenance_mode({set => 0});
# Record that we're rebooting so that 'striker-update-cluster' knows to wait for a reboot.
if ($anvil->data->{switches}{'job-uuid'})
{
my $query = "
UPDATE
jobs
SET
job_data = 'rebooted',
modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)."
WHERE
job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
# Register a job to reboot.
update_progress($anvil, 98, "message_0318");
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'anvil-manage-power'}." --reboot -y".$anvil->Log->switches,
job_data => "",
job_name => "reboot::system",
job_title => "job_0009",
job_description => "job_0006",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Record that we're going to reboot now.
update_progress($anvil, 100, "message_0317");
}
else
{
# Record that a reboot is needed.
update_progress($anvil, 100, "message_0039");
}
}
else
{
@ -140,17 +185,82 @@ sub run_os_update
# This needs to be set to avoid warnings when called without a job-uuid.
$anvil->data->{sys}{last_update} = 0;
# Make sure that, if we're a node, we're out of the cluster.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
if ($host_type eq "node")
{
# Call anvil-safe-stop
update_progress($anvil, 3, "message_0314");
my $problem = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
if (not $problem)
{
# Call anvil-safe-stop
update_progress($anvil, 4, "message_0315");
my $shell_call = $anvil->data->{path}{exe}{'anvil-safe-stop'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong, abort.
update_progress($anvil, 100, "error_0420,!!return_code!".$return_code."!!,!!output!".$output."!!");
# Set the job_data to 'failed' so that striker-update-cluster' knows to abort.
if ($anvil->data->{switches}{'job-uuid'})
{
my $query = "
UPDATE
jobs
SET
job_data = 'failed',
modified_date = ".$anvil->Database->quote($anvil->Database->refresh_timestamp)."
WHERE
job_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'job-uuid'})."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0035", variables => { output => $output } });
$anvil->nice_exit({exit_code => 4});
}
}
}
# Should we clear the cache?
if ($anvil->data->{switches}{'clear-cache'})
{
# Yes.
my $shell_call = $anvil->data->{path}{exe}{dnf}." clean all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
update_progress($anvil, 5, "message_0316");
}
# NOTE: We run this directly to better monitor progress and update the progress.
my $transaction_shown = 0;
my $success = 0;
my $to_update = 0;
my $percent_step = 0;
my $progress = 5;
my $progress = 6;
my $counted_lines = 0;
my $next_step = 0;
my $verifying = 0;
my $output = "";
my $shell_call = $anvil->data->{path}{exe}{dnf}." clean expire-cache && ".$anvil->data->{path}{exe}{dnf}." -y update --best --allowerasing; ".$anvil->data->{path}{exe}{echo}." return_code:\$?";
my $shell_call = $anvil->data->{path}{exe}{dnf}." -y update; ".$anvil->data->{path}{exe}{echo}." return_code:\$?";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }});
open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }});
while(<$file_handle>)
@ -162,14 +272,6 @@ sub run_os_update
$anvil->data->{counts}{lines}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "counts::lines" => $anvil->data->{counts}{lines}, line => $line }});
if ($line =~ /^kernel /)
{
# Reboot will be needed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { reboot_needed => $reboot_needed }});
}
# If there were no updates, let the user know.
if ($line =~ /^Nothing to do/i)
{
@ -286,6 +388,37 @@ sub run_os_update
my ($systemctl_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{systemctl}." daemon-reload", source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { systemctl_output => $systemctl_output, return_code => $return_code }});
### See if the kernel has been updated.
# Get the newest installed kernel
$shell_call = $anvil->data->{path}{exe}{rpm}." -q kernel | ".$anvil->data->{path}{exe}{'sort'}." | ".$anvil->data->{path}{exe}{tail}." -n 1";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $installed_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
installed_kernel => $installed_kernel,
return_code => $return_code,
}});
$installed_kernel =~ s/^kernel-(\d+.\d+\.\d+-\d+)\..*$/$1/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { installed_kernel => $installed_kernel }});
# Get the running kernel
$shell_call = $anvil->data->{path}{exe}{uname}." -r";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $active_kernel, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
active_kernel => $active_kernel,
return_code => $return_code,
}});
$active_kernel =~ s/(\d+.\d+\.\d+-\d+)\..*$/$1/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { active_kernel => $active_kernel }});
if ($installed_kernel ne $active_kernel)
{
# Reboot needed
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0690!#" }});
my $reboot_needed = $anvil->System->reboot_needed({set => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
}
# Did it work?
if (not $success)
{

@ -0,0 +1,797 @@
#!/usr/bin/perl
#
# This program will collect data from all accessible machines and compile it into a common tarball. This is
# designed to make it easier to diagnose faults.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connection.
#
# TODO:
#
# USAGE:
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Term::Cap;
use Text::Diff;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"anvil",
"hosts",
"output-file"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
# Make sure we're a striker.
if ($anvil->Get->host_type ne "striker")
{
print "This has to be run on a Striker dashboard.\n";
$anvil->nice_exit({exit_code => 1});
}
# Make sure the collection directory exists.
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1});
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::date_and_time" => $anvil->data->{sys}{date_and_time},
"sys::compile_directory" => $anvil->data->{sys}{compile_directory},
}});
print "Data collection has begun.\n";
print "Depending on how many systems we're collecting from, this could take a while.\n";
# Get the directory portion of the output path and make sure it exists.
my $tarball = process_output($anvil);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }});
process_switches($anvil);
collect_data($anvil);
# Create the tarball now.
print "Data collection complete, creating the tarball now... ";
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:tarball' => $tarball,
's2:shell_call' => $shell_call,
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
print "\n[ Complete ] - The debug data is here: [".$tarball."]\n";
print "[ Warning ] - The collected logs likely include sensitive information! Share is carefully!\n";
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
sub process_output
{
my ($anvil) = @_;
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2";
if ($anvil->data->{switches}{'output-file'})
{
my $new_directory = $anvil->data->{switches}{'output-file'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_directory => $new_directory }});
if ($new_directory !~ /^\//)
{
print "[ Error ] - The output path needs to be a path.\n";
$anvil->nice_exit({exit_code => 1});
}
else
{
# Append .tar.bz2.
$tarball = $new_directory;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }});
}
}
# Break the directory off and make sure the output directory exists.
my $output_file = ($tarball =~ /^.*\/(.*)$/)[0];
my $output_directory = ($tarball =~ /^(.*?)\/$output_file$/)[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output_file => $output_file,
output_directory => $output_directory,
}});
if (not $output_file)
{
$output_file = "anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }});
}
elsif ($output_file !~ /\.tar\.bz2/)
{
$output_file .= ".tar.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }});
}
if ($output_directory ne "/")
{
print "- Creating the output directory: [".$output_directory."]... ";
my $failed = $anvil->Storage->make_directory({directory => $output_directory});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
if ($failed)
{
print "Failed!\nUnable to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n";
$anvil->nice_exit({exit_code => 1});
}
}
$tarball = $output_directory."/".$output_file;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }});
return($tarball);
}
sub collect_data
{
my ($anvil) = @_;
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
if ($failed)
{
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n";
$anvil->nice_exit({exit_code => 1});
}
my $hosts = @{$anvil->data->{collect_from}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }});
foreach my $host_type ("striker", "node", "dr")
{
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:short_host_name' => $short_host_name,
's4:this_host_type' => $this_host_type,
}});
next if $host_type ne $this_host_type;
# Are we collecting from a subset only?
if ($hosts)
{
# Yes, is this host one of them?
my $found = 0;
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
this_host_uuid => $this_host_uuid,
}});
if ($this_host_uuid eq $host_uuid)
{
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }});
last;
}
}
next if not $found;
}
# Make sure there's a directory for this host.
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }});
if (not -d $target_directory)
{
my $failed = $anvil->Storage->make_directory({
directory => $target_directory,
mode => "777",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
if ($failed)
{
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n";
$anvil->nice_exit({exit_code => 1});
}
}
# Is this the local host or a remote one?
if ($host_uuid eq $anvil->Get->host_uuid)
{
### Collecting local data.
collect_local_data($anvil, $target_directory);
}
else
{
# Collecting data from a remote machine
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory);
if ($problem)
{
# Create a file saying we couldn't access this machine.
my $body = "No access to: [".$host_name."] found.\n";
my $file = $target_directory."/no_access.txt";
$anvil->Storage->write_file({
file => $file,
body => $body,
overwrite => 1,
backup => 0,
});
}
}
}
}
return(0);
}
sub collect_remote_data
{
my ($anvil, $host_uuid, $target_directory) = @_;
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name};
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
my $failed_body = "File not copied from: [".$host_name."].\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:short_host_name' => $short_host_name,
's4:this_host_type' => $this_host_type,
's5:target_directory' => $target_directory,
}});
# Dump the previous boot logs to a file.
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n";
print "- Testing access...\n";
my $matches = $anvil->Network->find_access({
debug => 2,
target => $host_name,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
next if $network_name !~ /^$preferred_network/;
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({target => $target_ip});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:network_name' => $network_name,
's2:target_ip' => $target_ip,
's3:test_access' => $test_access,
}});
if ($test_access)
{
# We're good.
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n";
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip;
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip},
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network},
}});
}
}
}
if (not $anvil->data->{peer}{$short_host_name}{access}{ip})
{
print "No access!!\n";
print "- Not able to collect data from this host, skipping.\n";
return(1);
}
print "- Writing out system logs from the previous boot... ";
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log",
destination => $target_directory."/",
});
my $test_file = $target_directory."/tmp/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# Dump the current boot logs
print "- Grabbing system logs from this boot... ";
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log",
destination => $target_directory."/",
});
$test_file = $target_directory."/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# If we're a striker, dump the database also.
if ($this_host_type eq "striker")
{
# What's the password and address?
if (not exists $anvil->data->{database}{$host_uuid})
{
# The remote striker isn't known
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n";
}
else
{
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... ";
my $pg_file = "/root/.pgpass";
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password};
$anvil->Storage->write_file({
file => $pg_file,
body => $pg_body,
mode => "600",
overwrite => 0,
backup => 0,
});
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Failed
print "Failed!\n";
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
print "========\n";
print $output."\n";
print "========\n";
$anvil->nice_exit({exit_code => 1});
}
unlink $pg_file;
print "Done!\n";
}
}
print "- Grabbing hosts file... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts",
destination => $target_directory."/",
});
$test_file = $target_directory."/hosts";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
print "- Grabbing Anvil! log... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log",
destination => $target_directory."/",
});
$test_file = $target_directory."/anvil.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# If this is a node, grab the shared files.
if ($this_host_type eq "node")
{
print "- Collecting the cluster information base (CIB)... ";
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > /tmp/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml",
destination => $target_directory."/",
});
my $test_file = $target_directory."/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
}
# If this is not a striker, collect definition files.
if ($this_host_type ne "striker")
{
print "- Collecting server definitions... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions",
destination => $target_directory."/",
});
print "Done!\n";
print "- Collecting replicated storage config... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d",
destination => $target_directory."/",
});
print "Done!\n";
}
return(0);
}
sub collect_local_data
{
my ($anvil, $target_directory) = @_;
my $host_uuid = $anvil->Get->host_uuid();
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:target_directory' => $target_directory,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
# Dump the previous boot logs to a file.
print "\nGrabbing logs and data from the local system.\n";
print "- Grabbing system logs from the previous boot... ";
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# Dump the current boot logs
print "- Grabbing system logs from this boot... ";
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# If we're a striker, dump the database also.
if ($this_host_type eq "striker")
{
print "- Dumping and compressing database data, PLEASE BE PATIENT!... ";
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Failed
print "Failed!\n";
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
print "========\n";
print $output."\n";
print "========\n";
$anvil->nice_exit({exit_code => 1});
}
print "Done!\n";
}
print "- Grabbing hosts file... ";
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
print "- Grabbing Anvil! log... ";
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# If this is a node, grab the shared files.
if ($this_host_type eq "node")
{
print "- Collecting the cluster information base (CIB)... ";
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > ".$target_directory."/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
}
# If this is not a striker, collect definition files.
if ($this_host_type ne "striker")
{
print "- Collecting server definitions... ";
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
}
return(0);
}
sub process_switches
{
my ($anvil) = @_;
$anvil->data->{collect_from} = [];
$anvil->Database->get_hosts();
if ($anvil->data->{switches}{anvil})
{
if ($anvil->data->{switches}{anvil} eq "#!SET!#")
{
# Show a list of Anvil! systems.
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n";
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}})
{
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n";
}
$anvil->nice_exit({exit_code => 0});
}
# Make sure the anvil is valid.
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({
debug => 2,
anvil => $anvil->data->{switches}{anvil},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:anvil_name' => $anvil_name,
's2:anvil_uuid' => $anvil_uuid,
}});
if (not $anvil_name)
{
# Bad name.
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n";
$anvil->nice_exit({exit_code => 1});
}
# Add the host_uuids to the collect_from array.
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
}
if ($anvil->data->{switches}{hosts})
{
if ($anvil->data->{switches}{hosts} eq "#!SET!#")
{
# Show a list of all machines.
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n";
foreach my $host_type ("striker", "node", "dr")
{
print "- Striker Dashboards:\n" if $host_type eq "striker";
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node";
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr";
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
next if $host_type ne $this_host_type;
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n";
}
}
$anvil->nice_exit({exit_code => 0});
}
foreach my $host (split/,/, $anvil->data->{switches}{hosts})
{
# Make sure this host is valid.
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host' => $host,
's2:host_uuid' => $host_uuid,
}});
if (not $host_uuid)
{
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n";
$anvil->nice_exit({exit_code => 1});
}
push @{$anvil->data->{collect_from}}, $host_uuid;
}
}
# If we were restricted to an anvil or host, make sure we've added the Strikers.
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts}))
{
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
next if $this_host_type ne "striker";
my $seen = 0;
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:this_host_uuid' => $this_host_uuid,
's2:host_uuid' => $host_uuid,
}});
if ($this_host_uuid eq $host_uuid)
{
$seen = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }});
last;
}
}
if (not $seen)
{
push @{$anvil->data->{collect_from}}, $host_uuid;
}
}
}
return(0);
}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save