commit
bf288fda49
29 changed files with 3836 additions and 270 deletions
@ -0,0 +1,45 @@ |
|||||||
|
.\" Manpage for the Anvil! power management tool |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH anvil-manage-power "8" "July 11 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
anvil-manage-power \- This program can power off, reboot, or set a flag indicating one of these actions are required. |
||||||
|
.SH SYNOPSIS |
||||||
|
.B anvil-manage-power |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This program can mark a machine as needing to be powered off or rebooted, or perform those actions directly or as a job. |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-no-wait\fR |
||||||
|
.TP |
||||||
|
Normally, this program will not reboot a machine until the uptime is over five minutes. This is done to provide a chance for someone to log in and disable anvil-daemon in the case of a reboot loop. This switch prevents waiting for that 5 minute delay. |
||||||
|
.TP |
||||||
|
\fB\-\-poweroff\fR, \fB\-\-power\-off\fR |
||||||
|
.TP |
||||||
|
This powers off the host. |
||||||
|
.TP |
||||||
|
\fB\-\-reboot\fR |
||||||
|
.TP |
||||||
|
This reboots the host. |
||||||
|
.TP |
||||||
|
\fB\-\-reboot\-needed\fR [0,1] |
||||||
|
.TP |
||||||
|
This sets (1) or clears (0) the 'reboot needed' flag for the host system. |
||||||
|
.TP |
||||||
|
\fB\-\-y\fR, \fB\-\-yes\fR |
||||||
|
.TP |
||||||
|
If passed, requests to reboot or power off won't ask for confirmation. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
@ -0,0 +1,32 @@ |
|||||||
|
.\" Manpage for the Anvil! storage groups |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH anvil-special-operations "8" "Jun 30 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
anvil-special-operations \- This program is generally meant to be used by other programs. |
||||||
|
.SH SYNOPSIS |
||||||
|
.B anvil-special-operations |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This tool is used, generally by other parts of the Anvil!, the accomplish tasks that generally can't be accomplished by direct system calls. It's a general purpose tool meant to solve specific corner cases. |
||||||
|
.TP |
||||||
|
.SH OPTIONS |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-task\fR |
||||||
|
This is the task being requested. Current optiopns are: |
||||||
|
.IP refresh-drbd-resource |
||||||
|
This requires \fB\-\-resource <new name>\fR, and will call 'drbdadm adjust <resource>' as a background task and then return immediately. This is required when adding a new volume to an existing resource as 'drbdadm adjust <res>' will hold until it is called on all active DRBD nodes. This blocks the caller after the first remote host call. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
@ -0,0 +1,39 @@ |
|||||||
|
.\" Manpage for the Anvil! cluster update tool. |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH anvil-update-system "8" "July 14 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
anvil-update-system \- This program updates the local operting system |
||||||
|
.SH SYNOPSIS |
||||||
|
.B anvil-update-system |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This program updates the local operating system. If the kernel is updated, a reboot will be performed. |
||||||
|
.TP |
||||||
|
.B Note: |
||||||
|
.TP |
||||||
|
If the host is an Anvil! subnode, the subnode will be removed from the Anvil! node (and servers migrated off, or, shut down if the peer subnode is offline). |
||||||
|
.TP |
||||||
|
.SH OPTIONS |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-clear\-cache\fR |
||||||
|
.TP |
||||||
|
This will force the dnf cache to be cleared before the OS update is started. This slows the update down a bit, but ensures the latest updates are installed. |
||||||
|
.TP |
||||||
|
\fB\-\-no\-reboot\fR |
||||||
|
.TP |
||||||
|
If the kernel is updated, the system will normally be rebooted. This switch prevents the reboot from occuring. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
@ -0,0 +1,45 @@ |
|||||||
|
.\" Manpage for the Anvil! machine power and access reporting tool. |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH striker-collect-data "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
striker-collect-data \- This program collects data needed to help diagnose problems with an Anvil! system. |
||||||
|
.SH SYNOPSIS |
||||||
|
.B striker-collect-data |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This program collects database data, logs, config files and other information needed to help diagnose problems with the Anvil! platform. By default, this collects all data from all accessible machines. |
||||||
|
.TP |
||||||
|
.B Note: |
||||||
|
.TP |
||||||
|
This program collects potentially secure information, like passwords. Be careful who you share the collected data with! |
||||||
|
.TP |
||||||
|
The data from Striker dashboards are always collected. |
||||||
|
.TP |
||||||
|
.SH OPTIONS |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-anvil\fR <name or UUID> |
||||||
|
.TP |
||||||
|
This restricts the data to be collected to the Striker dashboards and the specific Anvil! node pair. |
||||||
|
.TP |
||||||
|
\fB\-\-hosts\fR <comma-separated list of host names or UUIDs> |
||||||
|
.TP |
||||||
|
This can be used to specify which specific hosts data is collected from. Note that this can be used in conjuction with \fB\-\-anvil\fR to add additional hosts to collect data from, like DR hosts. |
||||||
|
.TP |
||||||
|
\fB\-\-output\-file\fR </path/to/file.tar.bz2> |
||||||
|
.TP |
||||||
|
This allows you to specify the output compressed tarball that the files will be saved in. By default, the output file is \fB/root/anvil-debug_<timestamp>.tar.bz2\fR. If this is a directory (ending in \fB/\fR), the normal file name is created, just in a different directory. If the path ends in a file that doesn't have the \fB.tar.bz2\fR suffix, that suffix will be added automatically. The output file will always be a bzip2's tarball. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
@ -0,0 +1,53 @@ |
|||||||
|
.\" Manpage for the Anvil! cluster update tool. |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH striker-update-cluster "8" "July 11 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
striker-update-cluster \- This program updates all physical machines in an Anvil! cluster |
||||||
|
.SH SYNOPSIS |
||||||
|
.B striker-update-cluster |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This program sequentially updates Striker dashboards, DR hosts and Anvil! nodes (the paired sub-nodes). It does this without needing to take hosted servers offline. |
||||||
|
.TP |
||||||
|
.B Note: |
||||||
|
.TP |
||||||
|
This program requires all machines be online, and Anvil! nodes being paired and sync'ed. When nodes are updated, the inactive subnode will be removed from the node, updated, rebooted if necessary, and then rejoined to the node. Then hosted servers will migrate to the now-updated subnode, and the process repeated for the other subnode. Anvil! nodes are updated sequentially, so the process can take some time to complete, but should not require a maintenance window. |
||||||
|
.TP |
||||||
|
The upgrade process will live-migrate all hosted servers! If any hosted server is either under heavy load, or the replication link (the BCN or MN) is relatively lower bandwidth, this could cause performance concerns. As such, it's ideal to run the upgrades at a time less sensitive to performance impacts. |
||||||
|
.TP |
||||||
|
.SH OPTIONS |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-clear\-cache\fR |
||||||
|
.TP |
||||||
|
This will force the dnf cache to be cleared before the OS update is started. This slows the update down a bit, but ensures the latest updates are installed. |
||||||
|
.TP |
||||||
|
\fB\-\-force\fR |
||||||
|
.TP |
||||||
|
If any Striker dashboards or DR hosts are unavailable, or if an entire node (paired subnodes) is offline, this switch will allow you to force the upgrade attempt. |
||||||
|
.TP |
||||||
|
\fB\-y\fR, \fB\-\-yes\fR |
||||||
|
.TP |
||||||
|
Automatically continue with the upgrade without prompting for confirmation. |
||||||
|
.TP |
||||||
|
\fB\-\-no\-reboot\fR |
||||||
|
.TP |
||||||
|
If the kernel is updated on a remote system, the system will normally be rebooted. This switch prevents the reboot from occuring. |
||||||
|
.TP |
||||||
|
\fB\-\-reboot\-self\fR |
||||||
|
.TP |
||||||
|
By default, if the local system needs to be updated, a message is printed but the local system is NOT rebooted. This switch will instead cause this host to reboot at the end of the cluster update. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,120 @@ |
|||||||
|
#!/usr/bin/perl |
||||||
|
# |
||||||
|
# This program has no specific purpose. It's a general program for performing certain special tasks that |
||||||
|
# can't be done otherwise in a reliable or efficient way. |
||||||
|
# |
||||||
|
# Exit codes; |
||||||
|
# 0 = Normal exit. |
||||||
|
# 1 = No database connection. |
||||||
|
|
||||||
|
|
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
use Anvil::Tools; |
||||||
|
require POSIX; |
||||||
|
use Text::Diff; |
||||||
|
use Data::Dumper; |
||||||
|
|
||||||
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
||||||
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
||||||
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
||||||
|
{ |
||||||
|
$running_directory =~ s/^\./$ENV{PWD}/; |
||||||
|
} |
||||||
|
|
||||||
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
||||||
|
$| = 1; |
||||||
|
|
||||||
|
my $anvil = Anvil::Tools->new(); |
||||||
|
|
||||||
|
# Read switches (target ([user@]host[:port]) and the file with the target's password. |
||||||
|
$anvil->Get->switches({list => [ |
||||||
|
"task", |
||||||
|
"resource", |
||||||
|
], man => $THIS_FILE}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
||||||
|
|
||||||
|
$anvil->Database->connect(); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
||||||
|
if (not $anvil->data->{sys}{database}{connections}) |
||||||
|
{ |
||||||
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try |
||||||
|
# again after we exit. |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"}); |
||||||
|
sleep 10; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
if ($anvil->data->{switches}{'job-uuid'}) |
||||||
|
{ |
||||||
|
$anvil->Job->clear(); |
||||||
|
$anvil->Job->get_job_details({debug => 2}); |
||||||
|
$anvil->Job->update_progress({ |
||||||
|
progress => 1, |
||||||
|
job_picked_up_by => $$, |
||||||
|
job_picked_up_at => time, |
||||||
|
message => "message_0311", |
||||||
|
}); |
||||||
|
} |
||||||
|
if ($anvil->data->{switches}{task} eq "refresh-drbd-resource") |
||||||
|
{ |
||||||
|
refresh_drbd_resource($anvil); |
||||||
|
} |
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
|
||||||
|
|
||||||
|
############################################################################################################# |
||||||
|
# Functions # |
||||||
|
############################################################################################################# |
||||||
|
|
||||||
|
# This function is needed to call 'drbdadm adjust <res>' in a background call from a remote host. This is |
||||||
|
# needed for adding new volumes to an existing resource, as the call from 'drbdadm adjust <res>' won't return |
||||||
|
# until the call is run on all hosts. |
||||||
|
sub refresh_drbd_resource |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
my $resource = $anvil->data->{switches}{resource}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); |
||||||
|
|
||||||
|
if (not $resource) |
||||||
|
{ |
||||||
|
# No resource. |
||||||
|
$anvil->Job->update_progress({ |
||||||
|
progress => 100, |
||||||
|
message => "error_0419", |
||||||
|
job_status => "failed", |
||||||
|
}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0419"}); |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
$anvil->Job->update_progress({ |
||||||
|
progress => 10, |
||||||
|
message => "message_0312,!!resource!".$resource."!!", |
||||||
|
}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0312", variables => { resource => $resource }}); |
||||||
|
|
||||||
|
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
my ($output, $return_code) = $anvil->System->call({ |
||||||
|
shell_call => $shell_call, |
||||||
|
background => 1, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
$anvil->Job->update_progress({ |
||||||
|
progress => 100, |
||||||
|
message => "message_0313,!!resource!".$resource."!!", |
||||||
|
}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0313", variables => { resource => $resource }}); |
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
@ -0,0 +1,797 @@ |
|||||||
|
#!/usr/bin/perl |
||||||
|
# |
||||||
|
# This program will collect data from all accessible machines and compile it into a common tarball. This is |
||||||
|
# designed to make it easier to diagnose faults. |
||||||
|
# |
||||||
|
# Exit codes; |
||||||
|
# 0 = Normal exit. |
||||||
|
# 1 = No database connection. |
||||||
|
# |
||||||
|
# TODO: |
||||||
|
# |
||||||
|
# USAGE: |
||||||
|
# |
||||||
|
|
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
use Anvil::Tools; |
||||||
|
require POSIX; |
||||||
|
use Term::Cap; |
||||||
|
use Text::Diff; |
||||||
|
use Data::Dumper; |
||||||
|
|
||||||
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
||||||
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
||||||
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
||||||
|
{ |
||||||
|
$running_directory =~ s/^\./$ENV{PWD}/; |
||||||
|
} |
||||||
|
|
||||||
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
||||||
|
$| = 1; |
||||||
|
|
||||||
|
my $anvil = Anvil::Tools->new(); |
||||||
|
|
||||||
|
# Read switches (target ([user@]host[:port]) and the file with the target's password. |
||||||
|
$anvil->Get->switches({list => [ |
||||||
|
"anvil", |
||||||
|
"hosts", |
||||||
|
"output-file"], man => $THIS_FILE}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
||||||
|
|
||||||
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
||||||
|
# is to setup the database server. |
||||||
|
$anvil->Database->connect(); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
||||||
|
if (not $anvil->data->{sys}{database}{connections}) |
||||||
|
{ |
||||||
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try |
||||||
|
# again after we exit. |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"}); |
||||||
|
sleep 10; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure we're running as 'root' |
||||||
|
# $< == real UID, $> == effective UID |
||||||
|
if (($< != 0) && ($> != 0)) |
||||||
|
{ |
||||||
|
# Not root |
||||||
|
print $anvil->Words->string({key => "error_0005"})."\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure we're a striker. |
||||||
|
if ($anvil->Get->host_type ne "striker") |
||||||
|
{ |
||||||
|
print "This has to be run on a Striker dashboard.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure the collection directory exists. |
||||||
|
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1}); |
||||||
|
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
"sys::date_and_time" => $anvil->data->{sys}{date_and_time}, |
||||||
|
"sys::compile_directory" => $anvil->data->{sys}{compile_directory}, |
||||||
|
}}); |
||||||
|
|
||||||
|
print "Data collection has begun.\n"; |
||||||
|
print "Depending on how many systems we're collecting from, this could take a while.\n"; |
||||||
|
|
||||||
|
# Get the directory portion of the output path and make sure it exists. |
||||||
|
my $tarball = process_output($anvil); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
||||||
|
|
||||||
|
process_switches($anvil); |
||||||
|
|
||||||
|
collect_data($anvil); |
||||||
|
|
||||||
|
# Create the tarball now. |
||||||
|
print "Data collection complete, creating the tarball now... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:tarball' => $tarball, |
||||||
|
's2:shell_call' => $shell_call, |
||||||
|
}}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "\n[ Complete ] - The debug data is here: [".$tarball."]\n"; |
||||||
|
print "[ Warning ] - The collected logs likely include sensitive information! Share is carefully!\n"; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
|
||||||
|
|
||||||
|
############################################################################################################# |
||||||
|
# Functions # |
||||||
|
############################################################################################################# |
||||||
|
|
||||||
|
sub process_output |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2"; |
||||||
|
if ($anvil->data->{switches}{'output-file'}) |
||||||
|
{ |
||||||
|
my $new_directory = $anvil->data->{switches}{'output-file'}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_directory => $new_directory }}); |
||||||
|
if ($new_directory !~ /^\//) |
||||||
|
{ |
||||||
|
print "[ Error ] - The output path needs to be a path.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
# Append .tar.bz2. |
||||||
|
$tarball = $new_directory; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# Break the directory off and make sure the output directory exists. |
||||||
|
my $output_file = ($tarball =~ /^.*\/(.*)$/)[0]; |
||||||
|
my $output_directory = ($tarball =~ /^(.*?)\/$output_file$/)[0]; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output_file => $output_file, |
||||||
|
output_directory => $output_directory, |
||||||
|
}}); |
||||||
|
|
||||||
|
if (not $output_file) |
||||||
|
{ |
||||||
|
$output_file = "anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }}); |
||||||
|
} |
||||||
|
elsif ($output_file !~ /\.tar\.bz2/) |
||||||
|
{ |
||||||
|
$output_file .= ".tar.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }}); |
||||||
|
} |
||||||
|
|
||||||
|
if ($output_directory ne "/") |
||||||
|
{ |
||||||
|
print "- Creating the output directory: [".$output_directory."]... "; |
||||||
|
my $failed = $anvil->Storage->make_directory({directory => $output_directory}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
||||||
|
if ($failed) |
||||||
|
{ |
||||||
|
print "Failed!\nUnable to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
$tarball = $output_directory."/".$output_file; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
||||||
|
|
||||||
|
return($tarball); |
||||||
|
} |
||||||
|
|
||||||
|
sub collect_data |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
||||||
|
if ($failed) |
||||||
|
{ |
||||||
|
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
my $hosts = @{$anvil->data->{collect_from}}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }}); |
||||||
|
foreach my $host_type ("striker", "node", "dr") |
||||||
|
{ |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:short_host_name' => $short_host_name, |
||||||
|
's4:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $host_type ne $this_host_type; |
||||||
|
|
||||||
|
# Are we collecting from a subset only? |
||||||
|
if ($hosts) |
||||||
|
{ |
||||||
|
# Yes, is this host one of them? |
||||||
|
my $found = 0; |
||||||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
host_uuid => $host_uuid, |
||||||
|
this_host_uuid => $this_host_uuid, |
||||||
|
}}); |
||||||
|
if ($this_host_uuid eq $host_uuid) |
||||||
|
{ |
||||||
|
$found = 1; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }}); |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
next if not $found; |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure there's a directory for this host. |
||||||
|
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }}); |
||||||
|
if (not -d $target_directory) |
||||||
|
{ |
||||||
|
my $failed = $anvil->Storage->make_directory({ |
||||||
|
directory => $target_directory, |
||||||
|
mode => "777", |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
||||||
|
if ($failed) |
||||||
|
{ |
||||||
|
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# Is this the local host or a remote one? |
||||||
|
if ($host_uuid eq $anvil->Get->host_uuid) |
||||||
|
{ |
||||||
|
### Collecting local data. |
||||||
|
collect_local_data($anvil, $target_directory); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
# Collecting data from a remote machine |
||||||
|
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory); |
||||||
|
if ($problem) |
||||||
|
{ |
||||||
|
# Create a file saying we couldn't access this machine. |
||||||
|
my $body = "No access to: [".$host_name."] found.\n"; |
||||||
|
my $file = $target_directory."/no_access.txt"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $file, |
||||||
|
body => $body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub collect_remote_data |
||||||
|
{ |
||||||
|
my ($anvil, $host_uuid, $target_directory) = @_; |
||||||
|
|
||||||
|
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}; |
||||||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
my $failed_body = "File not copied from: [".$host_name."].\n"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:short_host_name' => $short_host_name, |
||||||
|
's4:this_host_type' => $this_host_type, |
||||||
|
's5:target_directory' => $target_directory, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Dump the previous boot logs to a file. |
||||||
|
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n"; |
||||||
|
print "- Testing access...\n"; |
||||||
|
my $matches = $anvil->Network->find_access({ |
||||||
|
debug => 2, |
||||||
|
target => $host_name, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }}); |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = ""; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = ""; |
||||||
|
foreach my $preferred_network ("bcn", "mn", "ifn", "sn") |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }}); |
||||||
|
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}}) |
||||||
|
{ |
||||||
|
next if $network_name !~ /^$preferred_network/; |
||||||
|
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address}; |
||||||
|
my $test_access = $anvil->Remote->test_access({target => $target_ip}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:network_name' => $network_name, |
||||||
|
's2:target_ip' => $target_ip, |
||||||
|
's3:test_access' => $test_access, |
||||||
|
}}); |
||||||
|
|
||||||
|
if ($test_access) |
||||||
|
{ |
||||||
|
# We're good. |
||||||
|
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n"; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
||||||
|
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network}, |
||||||
|
}}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (not $anvil->data->{peer}{$short_host_name}{access}{ip}) |
||||||
|
{ |
||||||
|
print "No access!!\n"; |
||||||
|
print "- Not able to collect data from this host, skipping.\n"; |
||||||
|
return(1); |
||||||
|
} |
||||||
|
|
||||||
|
print "- Writing out system logs from the previous boot... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
my ($output, $error, $return_code) = $anvil->Remote->call({ |
||||||
|
shell_call => $shell_call, |
||||||
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
error => $error, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
my $test_file = $target_directory."/tmp/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# Dump the current boot logs |
||||||
|
print "- Grabbing system logs from this boot... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# If we're a striker, dump the database also. |
||||||
|
if ($this_host_type eq "striker") |
||||||
|
{ |
||||||
|
# What's the password and address? |
||||||
|
if (not exists $anvil->data->{database}{$host_uuid}) |
||||||
|
{ |
||||||
|
# The remote striker isn't known |
||||||
|
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... "; |
||||||
|
my $pg_file = "/root/.pgpass"; |
||||||
|
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password}; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $pg_file, |
||||||
|
body => $pg_body, |
||||||
|
mode => "600", |
||||||
|
overwrite => 0, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
if ($return_code) |
||||||
|
{ |
||||||
|
# Failed |
||||||
|
print "Failed!\n"; |
||||||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
||||||
|
print "========\n"; |
||||||
|
print $output."\n"; |
||||||
|
print "========\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
unlink $pg_file; |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing hosts file... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/hosts"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing Anvil! log... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/anvil.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# If this is a node, grab the shared files. |
||||||
|
if ($this_host_type eq "node") |
||||||
|
{ |
||||||
|
print "- Collecting the cluster information base (CIB)... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > /tmp/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
my $test_file = $target_directory."/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# If this is not a striker, collect definition files. |
||||||
|
if ($this_host_type ne "striker") |
||||||
|
{ |
||||||
|
print "- Collecting server definitions... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "- Collecting replicated storage config... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub collect_local_data |
||||||
|
{ |
||||||
|
my ($anvil, $target_directory) = @_; |
||||||
|
|
||||||
|
my $host_uuid = $anvil->Get->host_uuid(); |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:target_directory' => $target_directory, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Dump the previous boot logs to a file. |
||||||
|
print "\nGrabbing logs and data from the local system.\n"; |
||||||
|
print "- Grabbing system logs from the previous boot... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# Dump the current boot logs |
||||||
|
print "- Grabbing system logs from this boot... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# If we're a striker, dump the database also. |
||||||
|
if ($this_host_type eq "striker") |
||||||
|
{ |
||||||
|
print "- Dumping and compressing database data, PLEASE BE PATIENT!... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
if ($return_code) |
||||||
|
{ |
||||||
|
# Failed |
||||||
|
print "Failed!\n"; |
||||||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
||||||
|
print "========\n"; |
||||||
|
print $output."\n"; |
||||||
|
print "========\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing hosts file... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "- Grabbing Anvil! log... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# If this is a node, grab the shared files. |
||||||
|
if ($this_host_type eq "node") |
||||||
|
{ |
||||||
|
print "- Collecting the cluster information base (CIB)... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > ".$target_directory."/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
# If this is not a striker, collect definition files. |
||||||
|
if ($this_host_type ne "striker") |
||||||
|
{ |
||||||
|
print "- Collecting server definitions... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub process_switches |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
$anvil->data->{collect_from} = []; |
||||||
|
$anvil->Database->get_hosts(); |
||||||
|
|
||||||
|
if ($anvil->data->{switches}{anvil}) |
||||||
|
{ |
||||||
|
if ($anvil->data->{switches}{anvil} eq "#!SET!#") |
||||||
|
{ |
||||||
|
# Show a list of Anvil! systems. |
||||||
|
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n"; |
||||||
|
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}}) |
||||||
|
{ |
||||||
|
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n"; |
||||||
|
} |
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure the anvil is valid. |
||||||
|
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({ |
||||||
|
debug => 2, |
||||||
|
anvil => $anvil->data->{switches}{anvil}, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:anvil_name' => $anvil_name, |
||||||
|
's2:anvil_uuid' => $anvil_uuid, |
||||||
|
}}); |
||||||
|
|
||||||
|
if (not $anvil_name) |
||||||
|
{ |
||||||
|
# Bad name. |
||||||
|
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Add the host_uuids to the collect_from array. |
||||||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; |
||||||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; |
||||||
|
} |
||||||
|
|
||||||
|
if ($anvil->data->{switches}{hosts}) |
||||||
|
{ |
||||||
|
if ($anvil->data->{switches}{hosts} eq "#!SET!#") |
||||||
|
{ |
||||||
|
# Show a list of all machines. |
||||||
|
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n"; |
||||||
|
foreach my $host_type ("striker", "node", "dr") |
||||||
|
{ |
||||||
|
print "- Striker Dashboards:\n" if $host_type eq "striker"; |
||||||
|
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node"; |
||||||
|
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr"; |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $host_type ne $this_host_type; |
||||||
|
|
||||||
|
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n"; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
} |
||||||
|
|
||||||
|
foreach my $host (split/,/, $anvil->data->{switches}{hosts}) |
||||||
|
{ |
||||||
|
# Make sure this host is valid. |
||||||
|
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host' => $host, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
}}); |
||||||
|
if (not $host_uuid) |
||||||
|
{ |
||||||
|
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# If we were restricted to an anvil or host, make sure we've added the Strikers. |
||||||
|
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts})) |
||||||
|
{ |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $this_host_type ne "striker"; |
||||||
|
|
||||||
|
my $seen = 0; |
||||||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:this_host_uuid' => $this_host_uuid, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
}}); |
||||||
|
if ($this_host_uuid eq $host_uuid) |
||||||
|
{ |
||||||
|
$seen = 1; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }}); |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (not $seen) |
||||||
|
{ |
||||||
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue