This adds the new 'striker-collect-debug' tool that collects all potentially useful debug info into a single tarball.
* Fixed a bug in Get->anvil_from_switch() to work when the Anvil! name is passed. Signed-off-by: digimer <mkelly@alteeve.ca>main
parent
bf1ccc8bee
commit
a7ebe45f76
9 changed files with 1037 additions and 121 deletions
@ -0,0 +1,41 @@ |
|||||||
|
.\" Manpage for the Anvil! machine power and access reporting tool. |
||||||
|
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions. |
||||||
|
.TH striker-collect-data "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform" |
||||||
|
.SH NAME |
||||||
|
striker-collect-data \- This program collects data needed to help diagnose problems with an Anvil! system. |
||||||
|
.SH SYNOPSIS |
||||||
|
.B striker-collect-data |
||||||
|
\fI\,<command> \/\fR[\fI\,options\/\fR] |
||||||
|
.SH DESCRIPTION |
||||||
|
This program collects database data, logs, config files and other information needed to help diagnose problems with the Anvil! platform. By default, this collects all data from all accessible machines. |
||||||
|
.TP |
||||||
|
.B Note: |
||||||
|
.TP |
||||||
|
This program collects potentially secure information, like passwords. Be careful who you share the collected data with! |
||||||
|
.TP |
||||||
|
The data from Striker dashboards are always collected. |
||||||
|
.TP |
||||||
|
.SH OPTIONS |
||||||
|
.TP |
||||||
|
\-?, \-h, \fB\-\-help\fR |
||||||
|
Show this man page. |
||||||
|
.TP |
||||||
|
\fB\-\-log-secure\fR |
||||||
|
When logging, record sensitive data, like passwords. |
||||||
|
.TP |
||||||
|
\-v, \-vv, \-vvv |
||||||
|
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data. |
||||||
|
.SS "Commands:" |
||||||
|
.TP |
||||||
|
\fB\-\-anvil\fR <name or UUID> |
||||||
|
.TP |
||||||
|
This restricts the data to be collected to the Striker dashboards and the specific Anvil! node pair. |
||||||
|
.TP |
||||||
|
\fB\-\-hosts\fR <comma-separated list of host names or UUIDs> |
||||||
|
.TP |
||||||
|
This can be used to specify which specific hosts data is collected from. Note that this can be used in conjuction with \fB\-\-anvil\fR to add additional hosts to collect data from, like DR hosts. |
||||||
|
.IP |
||||||
|
.SH AUTHOR |
||||||
|
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. |
||||||
|
.SH "REPORTING BUGS" |
||||||
|
Report bugs to users@clusterlabs.org |
@ -0,0 +1,737 @@ |
|||||||
|
#!/usr/bin/perl |
||||||
|
# |
||||||
|
# This program will collect data from all accessible machines and compile it into a common tarball. This is |
||||||
|
# designed to make it easier to diagnose faults. |
||||||
|
# |
||||||
|
# Exit codes; |
||||||
|
# 0 = Normal exit. |
||||||
|
# 1 = No database connection. |
||||||
|
# |
||||||
|
# TODO: |
||||||
|
# |
||||||
|
# USAGE: |
||||||
|
# |
||||||
|
|
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
use Anvil::Tools; |
||||||
|
require POSIX; |
||||||
|
use Term::Cap; |
||||||
|
use Text::Diff; |
||||||
|
use Data::Dumper; |
||||||
|
|
||||||
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
||||||
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
||||||
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
||||||
|
{ |
||||||
|
$running_directory =~ s/^\./$ENV{PWD}/; |
||||||
|
} |
||||||
|
|
||||||
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
||||||
|
$| = 1; |
||||||
|
|
||||||
|
my $anvil = Anvil::Tools->new(); |
||||||
|
|
||||||
|
### TODO: Remove this before final release |
||||||
|
$anvil->Log->level({set => 2}); |
||||||
|
$anvil->Log->secure({set => 1}); |
||||||
|
########################################## |
||||||
|
|
||||||
|
# Read switches (target ([user@]host[:port]) and the file with the target's password. |
||||||
|
$anvil->Get->switches({list => ["anvil", "hosts"], man => $THIS_FILE}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
||||||
|
|
||||||
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
||||||
|
# is to setup the database server. |
||||||
|
$anvil->Database->connect(); |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
||||||
|
if (not $anvil->data->{sys}{database}{connections}) |
||||||
|
{ |
||||||
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try |
||||||
|
# again after we exit. |
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"}); |
||||||
|
sleep 10; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure we're running as 'root' |
||||||
|
# $< == real UID, $> == effective UID |
||||||
|
if (($< != 0) && ($> != 0)) |
||||||
|
{ |
||||||
|
# Not root |
||||||
|
print $anvil->Words->string({key => "error_0005"})."\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure we're a striker. |
||||||
|
if ($anvil->Get->host_type ne "striker") |
||||||
|
{ |
||||||
|
print "This has to be run on a Striker dashboard.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
print "Data collection has begun.\n"; |
||||||
|
print "Depending on how many systems we're collecting from, this could take a while.\n"; |
||||||
|
|
||||||
|
process_switches($anvil); |
||||||
|
|
||||||
|
collect_data($anvil); |
||||||
|
|
||||||
|
# Create the tarball now. |
||||||
|
print "Data collection complete, creating the tarball now... "; |
||||||
|
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2"; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:tarball' => $tarball, |
||||||
|
's2:shell_call' => $shell_call, |
||||||
|
}}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "\n[ Complete ] - The debug data is here: [".$tarball."]\n"; |
||||||
|
print "[ Warning ] - The collected logs likely include sensitive information! Share is carefully!\n"; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
|
||||||
|
|
||||||
|
############################################################################################################# |
||||||
|
# Functions # |
||||||
|
############################################################################################################# |
||||||
|
|
||||||
|
sub collect_data |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
# Make sure the collection directory exists. |
||||||
|
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1}); |
||||||
|
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
"sys::date_and_time" => $anvil->data->{sys}{date_and_time}, |
||||||
|
"sys::compile_directory" => $anvil->data->{sys}{compile_directory}, |
||||||
|
}}); |
||||||
|
|
||||||
|
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
||||||
|
if ($failed) |
||||||
|
{ |
||||||
|
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
my $hosts = @{$anvil->data->{collect_from}}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }}); |
||||||
|
foreach my $host_type ("striker", "node", "dr") |
||||||
|
{ |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:short_host_name' => $short_host_name, |
||||||
|
's4:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $host_type ne $this_host_type; |
||||||
|
|
||||||
|
# Are we collecting from a subset only? |
||||||
|
if ($hosts) |
||||||
|
{ |
||||||
|
# Yes, is this host one of them? |
||||||
|
my $found = 0; |
||||||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
host_uuid => $host_uuid, |
||||||
|
this_host_uuid => $this_host_uuid, |
||||||
|
}}); |
||||||
|
if ($this_host_uuid eq $host_uuid) |
||||||
|
{ |
||||||
|
$found = 1; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }}); |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
next if not $found; |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure there's a directory for this host. |
||||||
|
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }}); |
||||||
|
if (not -d $target_directory) |
||||||
|
{ |
||||||
|
my $failed = $anvil->Storage->make_directory({ |
||||||
|
directory => $target_directory, |
||||||
|
mode => "777", |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
||||||
|
if ($failed) |
||||||
|
{ |
||||||
|
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# Is this the local host or a remote one? |
||||||
|
if ($host_uuid eq $anvil->Get->host_uuid) |
||||||
|
{ |
||||||
|
### Collecting local data. |
||||||
|
collect_local_data($anvil, $target_directory); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
# Collecting data from a remote machine |
||||||
|
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory); |
||||||
|
if ($problem) |
||||||
|
{ |
||||||
|
# Create a file saying we couldn't access this machine. |
||||||
|
my $body = "No access to: [".$host_name."] found.\n"; |
||||||
|
my $file = $target_directory."/no_access.txt"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $file, |
||||||
|
body => $body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub collect_remote_data |
||||||
|
{ |
||||||
|
my ($anvil, $host_uuid, $target_directory) = @_; |
||||||
|
|
||||||
|
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}; |
||||||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
my $failed_body = "File not copied from: [".$host_name."].\n"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:short_host_name' => $short_host_name, |
||||||
|
's4:this_host_type' => $this_host_type, |
||||||
|
's5:target_directory' => $target_directory, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Dump the previous boot logs to a file. |
||||||
|
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n"; |
||||||
|
print "- Testing access...\n"; |
||||||
|
my $matches = $anvil->Network->find_access({ |
||||||
|
debug => 2, |
||||||
|
target => $host_name, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }}); |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = ""; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = ""; |
||||||
|
foreach my $preferred_network ("bcn", "mn", "ifn", "sn") |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }}); |
||||||
|
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}}) |
||||||
|
{ |
||||||
|
next if $network_name !~ /^$preferred_network/; |
||||||
|
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address}; |
||||||
|
my $test_access = $anvil->Remote->test_access({target => $target_ip}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:network_name' => $network_name, |
||||||
|
's2:target_ip' => $target_ip, |
||||||
|
's3:test_access' => $test_access, |
||||||
|
}}); |
||||||
|
|
||||||
|
if ($test_access) |
||||||
|
{ |
||||||
|
# We're good. |
||||||
|
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n"; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip; |
||||||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
||||||
|
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network}, |
||||||
|
}}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (not $anvil->data->{peer}{$short_host_name}{access}{ip}) |
||||||
|
{ |
||||||
|
print "No access!!\n"; |
||||||
|
print "- Not able to collect data from this host, skipping.\n"; |
||||||
|
return(1); |
||||||
|
} |
||||||
|
|
||||||
|
print "- Writing out system logs from the previous boot... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
my ($output, $error, $return_code) = $anvil->Remote->call({ |
||||||
|
shell_call => $shell_call, |
||||||
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
error => $error, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
my $test_file = $target_directory."/tmp/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# Dump the current boot logs |
||||||
|
print "- Grabbing system logs from this boot... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# If we're a striker, dump the database also. |
||||||
|
if ($this_host_type eq "striker") |
||||||
|
{ |
||||||
|
# What's the password and address? |
||||||
|
if (not exists $anvil->data->{database}{$host_uuid}) |
||||||
|
{ |
||||||
|
# The remote striker isn't known |
||||||
|
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... "; |
||||||
|
my $pg_file = "/root/.pgpass"; |
||||||
|
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password}; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $pg_file, |
||||||
|
body => $pg_body, |
||||||
|
mode => "600", |
||||||
|
overwrite => 0, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
if ($return_code) |
||||||
|
{ |
||||||
|
# Failed |
||||||
|
print "Failed!\n"; |
||||||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
||||||
|
print "========\n"; |
||||||
|
print $output."\n"; |
||||||
|
print "========\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
unlink $pg_file; |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing hosts file... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/hosts"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing Anvil! log... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
$test_file = $target_directory."/anvil.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
# If this is a node, grab the shared files. |
||||||
|
if ($this_host_type eq "node") |
||||||
|
{ |
||||||
|
print "- Collecting the cluster information base (CIB)... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > /tmp/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Copying the file |
||||||
|
print "Done! Copying to here... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
my $test_file = $target_directory."/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
||||||
|
if (not -e $test_file) |
||||||
|
{ |
||||||
|
print "Done.\n"; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
print "Failed!\n"; |
||||||
|
print "- For some reason, this file was not collected.\n"; |
||||||
|
$anvil->Storage->write_file({ |
||||||
|
file => $test_file, |
||||||
|
body => $failed_body, |
||||||
|
overwrite => 1, |
||||||
|
backup => 0, |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# If this is not a striker, collect definition files. |
||||||
|
if ($this_host_type ne "striker") |
||||||
|
{ |
||||||
|
print "- Collecting server definitions... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "- Collecting replicated storage config... "; |
||||||
|
$anvil->Storage->rsync({ |
||||||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d", |
||||||
|
destination => $target_directory."/", |
||||||
|
}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub collect_local_data |
||||||
|
{ |
||||||
|
my ($anvil, $target_directory) = @_; |
||||||
|
|
||||||
|
my $host_uuid = $anvil->Get->host_uuid(); |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:target_directory' => $target_directory, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
|
||||||
|
# Dump the previous boot logs to a file. |
||||||
|
print "\nGrabbing logs and data from the local system.\n"; |
||||||
|
print "- Grabbing system logs from the previous boot... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# Dump the current boot logs |
||||||
|
print "- Grabbing system logs from this boot... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# If we're a striker, dump the database also. |
||||||
|
if ($this_host_type eq "striker") |
||||||
|
{ |
||||||
|
print "- Dumping and compressing database data, PLEASE BE PATIENT!... "; |
||||||
|
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
if ($return_code) |
||||||
|
{ |
||||||
|
# Failed |
||||||
|
print "Failed!\n"; |
||||||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
||||||
|
print "========\n"; |
||||||
|
print $output."\n"; |
||||||
|
print "========\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
print "- Grabbing hosts file... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
print "- Grabbing Anvil! log... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
|
||||||
|
# If this is a node, grab the shared files. |
||||||
|
if ($this_host_type eq "node") |
||||||
|
{ |
||||||
|
print "- Collecting the cluster information base (CIB)... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > ".$target_directory."/cib.xml"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
# If this is not a striker, collect definition files. |
||||||
|
if ($this_host_type ne "striker") |
||||||
|
{ |
||||||
|
print "- Collecting server definitions... "; |
||||||
|
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/"; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
||||||
|
|
||||||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
output => $output, |
||||||
|
return_code => $return_code, |
||||||
|
}}); |
||||||
|
print "Done!\n"; |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
||||||
|
|
||||||
|
sub process_switches |
||||||
|
{ |
||||||
|
my ($anvil) = @_; |
||||||
|
|
||||||
|
$anvil->data->{collect_from} = []; |
||||||
|
$anvil->Database->get_hosts(); |
||||||
|
|
||||||
|
if ($anvil->data->{switches}{anvil}) |
||||||
|
{ |
||||||
|
if ($anvil->data->{switches}{anvil} eq "#!SET!#") |
||||||
|
{ |
||||||
|
# Show a list of Anvil! systems. |
||||||
|
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n"; |
||||||
|
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}}) |
||||||
|
{ |
||||||
|
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n"; |
||||||
|
} |
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
} |
||||||
|
|
||||||
|
# Make sure the anvil is valid. |
||||||
|
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({ |
||||||
|
debug => 2, |
||||||
|
anvil => $anvil->data->{switches}{anvil}, |
||||||
|
}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:anvil_name' => $anvil_name, |
||||||
|
's2:anvil_uuid' => $anvil_uuid, |
||||||
|
}}); |
||||||
|
|
||||||
|
if (not $anvil_name) |
||||||
|
{ |
||||||
|
# Bad name. |
||||||
|
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
# Add the host_uuids to the collect_from array. |
||||||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; |
||||||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; |
||||||
|
} |
||||||
|
|
||||||
|
if ($anvil->data->{switches}{hosts}) |
||||||
|
{ |
||||||
|
if ($anvil->data->{switches}{hosts} eq "#!SET!#") |
||||||
|
{ |
||||||
|
# Show a list of all machines. |
||||||
|
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n"; |
||||||
|
foreach my $host_type ("striker", "node", "dr") |
||||||
|
{ |
||||||
|
print "- Striker Dashboards:\n" if $host_type eq "striker"; |
||||||
|
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node"; |
||||||
|
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr"; |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $host_type ne $this_host_type; |
||||||
|
|
||||||
|
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n"; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
$anvil->nice_exit({exit_code => 0}); |
||||||
|
} |
||||||
|
|
||||||
|
foreach my $host (split/,/, $anvil->data->{switches}{hosts}) |
||||||
|
{ |
||||||
|
# Make sure this host is valid. |
||||||
|
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host}); |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host' => $host, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
}}); |
||||||
|
if (not $host_uuid) |
||||||
|
{ |
||||||
|
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n"; |
||||||
|
$anvil->nice_exit({exit_code => 1}); |
||||||
|
} |
||||||
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# If we were restricted to an anvil or host, make sure we've added the Strikers. |
||||||
|
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts})) |
||||||
|
{ |
||||||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
||||||
|
{ |
||||||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
||||||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:host_name' => $host_name, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
's3:this_host_type' => $this_host_type, |
||||||
|
}}); |
||||||
|
next if $this_host_type ne "striker"; |
||||||
|
|
||||||
|
my $seen = 0; |
||||||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
||||||
|
{ |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
||||||
|
's1:this_host_uuid' => $this_host_uuid, |
||||||
|
's2:host_uuid' => $host_uuid, |
||||||
|
}}); |
||||||
|
if ($this_host_uuid eq $host_uuid) |
||||||
|
{ |
||||||
|
$seen = 1; |
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }}); |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (not $seen) |
||||||
|
{ |
||||||
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return(0); |
||||||
|
} |
Loading…
Reference in new issue