You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
910 lines
32 KiB
910 lines
32 KiB
#!/usr/bin/perl |
|
# |
|
# This program will collect data from all accessible machines and compile it into a common tarball. This is |
|
# designed to make it easier to diagnose faults. |
|
# |
|
# Exit codes; |
|
# 0 = Normal exit. |
|
# 1 = No database connection. |
|
# |
|
# TODO: |
|
# |
|
# USAGE: |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
require POSIX; |
|
use Term::Cap; |
|
use Text::Diff; |
|
use Data::Dumper; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
my $anvil = Anvil::Tools->new(); |
|
|
|
# Read switches (target ([user@]host[:port]) and the file with the target's password. |
|
$anvil->Get->switches({list => [ |
|
"anvil", |
|
"hosts", |
|
"output-file"], man => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
|
# is to setup the database server. |
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try |
|
# again after we exit. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"}); |
|
sleep 10; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Make sure we're running as 'root' |
|
# $< == real UID, $> == effective UID |
|
if (($< != 0) && ($> != 0)) |
|
{ |
|
# Not root |
|
print $anvil->Words->string({key => "error_0005"})."\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Make sure we're a striker. |
|
if ($anvil->Get->host_type ne "striker") |
|
{ |
|
print "This has to be run on a Striker dashboard.\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Make sure the collection directory exists. |
|
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1}); |
|
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"sys::date_and_time" => $anvil->data->{sys}{date_and_time}, |
|
"sys::compile_directory" => $anvil->data->{sys}{compile_directory}, |
|
}}); |
|
|
|
print "Data collection has begun. This will take a while!\n\n"; |
|
|
|
# Get the directory portion of the output path and make sure it exists. |
|
my $tarball = process_output($anvil); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
|
|
|
process_switches($anvil); |
|
|
|
collect_data($anvil); |
|
|
|
# Create the tarball now. |
|
print "\nData collection complete\n"; |
|
print "- Creating the tarball now. PLEASE BE PATIENT!... "; |
|
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:tarball' => $tarball, |
|
's2:shell_call' => $shell_call, |
|
}}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
|
|
print "\n[ Complete ] - The debug data collected here: [".$tarball."]\n"; |
|
print "[ Warning ] - The collected data and logs likely include sensitive information! Share it carefully!\n"; |
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
sub process_output |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2"; |
|
if ($anvil->data->{switches}{'output-file'}) |
|
{ |
|
my $new_directory = $anvil->data->{switches}{'output-file'}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_directory => $new_directory }}); |
|
if ($new_directory !~ /^\//) |
|
{ |
|
print "[ Error ] - The output path needs to be a path.\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
else |
|
{ |
|
# Append .tar.bz2. |
|
$tarball = $new_directory; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
|
} |
|
} |
|
|
|
# Break the directory off and make sure the output directory exists. |
|
my $output_file = ($tarball =~ /^.*\/(.*)$/)[0]; |
|
my $output_directory = ($tarball =~ /^(.*?)\/$output_file$/)[0]; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output_file => $output_file, |
|
output_directory => $output_directory, |
|
}}); |
|
|
|
if (not $output_file) |
|
{ |
|
$output_file = "anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }}); |
|
} |
|
elsif ($output_file !~ /\.tar\.bz2/) |
|
{ |
|
$output_file .= ".tar.bz2"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output_file => $output_file }}); |
|
} |
|
|
|
if ($output_directory ne "/") |
|
{ |
|
print "Preparing local machine\n"; |
|
if (! -d $output_directory) { |
|
print "- Creating the output directory: [".$output_directory."]... "; |
|
my $failed = $anvil->Storage->make_directory({directory => $output_directory}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
if ($failed) |
|
{ |
|
print "Failed!\nUnable to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
print "Done!\n"; |
|
} |
|
else |
|
{ |
|
print "- Output directory [".$output_directory."] already exists.\n"; |
|
} |
|
} |
|
|
|
$tarball = $output_directory."/".$output_file; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tarball => $tarball }}); |
|
|
|
return($tarball); |
|
} |
|
|
|
sub collect_data |
|
{ |
|
my ($anvil) = @_; |
|
|
|
print "- Creating temporary data dir [".$anvil->data->{sys}{compile_directory}."]... "; |
|
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
if ($failed) |
|
{ |
|
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
print "Done!\n"; |
|
|
|
my $hosts = @{$anvil->data->{collect_from}}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }}); |
|
foreach my $host_type ("striker", "node", "dr") |
|
{ |
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
|
{ |
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host_name' => $host_name, |
|
's2:host_uuid' => $host_uuid, |
|
's3:short_host_name' => $short_host_name, |
|
's4:this_host_type' => $this_host_type, |
|
}}); |
|
next if $host_type ne $this_host_type; |
|
|
|
# Are we collecting from a subset only? |
|
if ($hosts) |
|
{ |
|
# Yes, is this host one of them? |
|
my $found = 0; |
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
host_uuid => $host_uuid, |
|
this_host_uuid => $this_host_uuid, |
|
}}); |
|
if ($this_host_uuid eq $host_uuid) |
|
{ |
|
$found = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }}); |
|
last; |
|
} |
|
} |
|
next if not $found; |
|
} |
|
|
|
# Make sure there's a directory for this host. |
|
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }}); |
|
if (not -d $target_directory) |
|
{ |
|
my $failed = $anvil->Storage->make_directory({ |
|
directory => $target_directory, |
|
mode => "777", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
if ($failed) |
|
{ |
|
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
} |
|
|
|
# Is this the local host or a remote one? |
|
if ($host_uuid eq $anvil->Get->host_uuid) |
|
{ |
|
### Collecting local data. |
|
collect_local_data($anvil, $target_directory); |
|
} |
|
else |
|
{ |
|
# Collecting data from a remote machine |
|
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory); |
|
if ($problem) |
|
{ |
|
# Create a file saying we couldn't access this machine. |
|
my $body = "No access to: [".$host_name."] found.\n"; |
|
my $file = $target_directory."/no_access.txt"; |
|
$anvil->Storage->write_file({ |
|
file => $file, |
|
body => $body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub collect_remote_data |
|
{ |
|
my ($anvil, $host_uuid, $target_directory) = @_; |
|
|
|
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}; |
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
my $failed_body = "File not copied from: [".$host_name."].\n"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host_name' => $host_name, |
|
's2:host_uuid' => $host_uuid, |
|
's3:short_host_name' => $short_host_name, |
|
's4:this_host_type' => $this_host_type, |
|
's5:target_directory' => $target_directory, |
|
}}); |
|
|
|
# Dump the previous boot logs to a file. |
|
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n"; |
|
print "- Testing access...\n"; |
|
my $matches = $anvil->Network->find_access({ |
|
debug => 2, |
|
target => $host_name, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }}); |
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = ""; |
|
$anvil->data->{peer}{$short_host_name}{access}{network} = ""; |
|
foreach my $preferred_network ("bcn", "mn", "ifn", "sn", "any") |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }}); |
|
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { network_name => $network_name }}); |
|
if (($network_name !~ /^$preferred_network/) && ($preferred_network ne "any")) |
|
{ |
|
next; |
|
} |
|
|
|
next if $network_name !~ /^$preferred_network/; |
|
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address}; |
|
my $test_access = $anvil->Remote->test_access({target => $target_ip}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:network_name' => $network_name, |
|
's2:target_ip' => $target_ip, |
|
's3:test_access' => $test_access, |
|
}}); |
|
|
|
if ($test_access) |
|
{ |
|
# We're good. |
|
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n"; |
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip; |
|
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
|
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network}, |
|
}}); |
|
} |
|
} |
|
} |
|
|
|
if (not $anvil->data->{peer}{$short_host_name}{access}{ip}) |
|
{ |
|
print "No access!!\n"; |
|
print "- Not able to collect data from this host, skipping.\n"; |
|
return(1); |
|
} |
|
|
|
print "- Writing out system logs from the previous boot... "; |
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
my ($output, $error, $return_code) = $anvil->Remote->call({ |
|
shell_call => $shell_call, |
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
error => $error, |
|
return_code => $return_code, |
|
}}); |
|
|
|
# Copying the file |
|
print "Done! Copying to here... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log", |
|
destination => $target_directory."/", |
|
}); |
|
my $test_file = $target_directory."/journalctl-previous-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print " - [ Warning ] - For some reason, this file was not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
|
|
# Dump the current boot logs |
|
print "- Grabbing system logs from this boot... "; |
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $error, $return_code) = $anvil->Remote->call({ |
|
shell_call => $shell_call, |
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
error => $error, |
|
return_code => $return_code, |
|
}}); |
|
|
|
# Copying the file |
|
print "Done! Copying to here... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log", |
|
destination => $target_directory."/", |
|
}); |
|
$test_file = $target_directory."/journalctl-current-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print "- For some reason, this file was not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
|
|
# Grab cloud-init data, if it exists. |
|
$shell_call = "if [ -e /var/log/cloud-init.log ]; then echo 1; else echo 0; fi"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $error, $return_code) = $anvil->Remote->call({ |
|
shell_call => $shell_call, |
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
error => $error, |
|
return_code => $return_code, |
|
}}); |
|
|
|
if ($output eq "1") |
|
{ |
|
print "- Grabbing cloud-init logs... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/cloud-init*", |
|
destination => $target_directory."/", |
|
}); |
|
$test_file = $target_directory."/cloud-init.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print "- For some reason, these files were not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
} |
|
|
|
# If we're a striker, dump the database also. |
|
if ($this_host_type eq "striker") |
|
{ |
|
# What's the password and address? |
|
if (not exists $anvil->data->{database}{$host_uuid}) |
|
{ |
|
# The remote striker isn't known |
|
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n"; |
|
} |
|
else |
|
{ |
|
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... "; |
|
my $pg_file = "/root/.pgpass"; |
|
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password}; |
|
$anvil->Storage->write_file({ |
|
file => $pg_file, |
|
body => $pg_body, |
|
mode => "600", |
|
overwrite => 0, |
|
backup => 0, |
|
}); |
|
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil > ".$target_directory."/anvil.out.bz2"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
if ($return_code) |
|
{ |
|
# Failed |
|
print "Failed!\n"; |
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
|
print "========\n"; |
|
print $output."\n"; |
|
print "========\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
unlink $pg_file; |
|
print "Done!\n"; |
|
} |
|
} |
|
|
|
print "- Grabbing hosts file... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts", |
|
destination => $target_directory."/", |
|
}); |
|
$test_file = $target_directory."/hosts"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print "- For some reason, this file was not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
|
|
print "- Grabbing Anvil! log... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log", |
|
destination => $target_directory."/", |
|
}); |
|
$test_file = $target_directory."/anvil.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print "- For some reason, this file was not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
|
|
# If this is a node, grab the shared files. |
|
if ($this_host_type eq "node") |
|
{ |
|
### NOTE: The pcs wrapper was timing out when dumping the CIB, hence statically calling pcs below. |
|
print "- Collecting the cluster information base (CIB)... "; |
|
$shell_call = $anvil->data->{path}{exe}{pcs_direct}." cluster cib > /tmp/cib.xml"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $error, $return_code) = $anvil->Remote->call({ |
|
shell_call => $shell_call, |
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
|
|
# Copying the file |
|
print "Done! Copying to here... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml", |
|
destination => $target_directory."/", |
|
}); |
|
my $test_file = $target_directory."/cib.xml"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }}); |
|
if (-e $test_file) |
|
{ |
|
print "Done.\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\n"; |
|
print "- For some reason, this file was not collected.\n"; |
|
$anvil->Storage->write_file({ |
|
file => $test_file, |
|
body => $failed_body, |
|
overwrite => 1, |
|
backup => 0, |
|
}); |
|
} |
|
} |
|
|
|
# If this is not a striker, collect definition files. |
|
if ($this_host_type ne "striker") |
|
{ |
|
print "- Collecting server definitions... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions", |
|
destination => $target_directory."/", |
|
}); |
|
print "Done!\n"; |
|
|
|
print "- Collecting replicated storage config... "; |
|
$anvil->Storage->rsync({ |
|
debug => 2, |
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d", |
|
destination => $target_directory."/", |
|
}); |
|
print "Done!\n"; |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub collect_local_data |
|
{ |
|
my ($anvil, $target_directory) = @_; |
|
|
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:target_directory' => $target_directory, |
|
's2:host_uuid' => $host_uuid, |
|
's3:this_host_type' => $this_host_type, |
|
}}); |
|
|
|
# Dump the previous boot logs to a file. |
|
print "\nGrabbing logs and data from the local system.\n"; |
|
print "- Grabbing system logs from the previous boot... "; |
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
|
|
# Dump the current boot logs |
|
print "- Grabbing system logs from this boot... "; |
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
|
|
# If we're a striker, dump the database also. |
|
if ($this_host_type eq "striker") |
|
{ |
|
print "- Dumping and compressing database data, PLEASE BE PATIENT!... "; |
|
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
if ($return_code) |
|
{ |
|
# Failed |
|
print "Failed!\n"; |
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
|
print "========\n"; |
|
print $output."\n"; |
|
print "========\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
print "Done!\n"; |
|
|
|
# Grab screenshots. |
|
print "- Collecting server screenshots... "; |
|
if (-d $anvil->data->{path}{directories}{screenshots}) |
|
{ |
|
$shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$target_directory."/server-screenshots.bz2 ".$anvil->data->{path}{directories}{screenshots}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
if ($return_code) |
|
{ |
|
# Failed |
|
print "Failed!\n"; |
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n"; |
|
print "========\n"; |
|
print $output."\n"; |
|
print "========\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
print "Done!\n"; |
|
} |
|
else |
|
{ |
|
print "Failed!\nScreenshot directory: [".$anvil->data->{path}{directories}{screenshots}."] doesn't exist, skipping.\n"; |
|
} |
|
} |
|
|
|
print "- Grabbing hosts file... "; |
|
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
|
|
print "- Grabbing Anvil! log... "; |
|
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
|
|
if (-e "/var/log/cloud-init.log") |
|
{ |
|
print "- Grabbing cloud-init logs... "; |
|
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/cloud-init* ".$target_directory."/"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
} |
|
|
|
# If this is a node, grab the shared files. |
|
if ($this_host_type eq "node") |
|
{ |
|
print "- Collecting the cluster information base (CIB)... "; |
|
$shell_call = $anvil->data->{path}{exe}{pcs_direct}." cluster cib > ".$target_directory."/cib.xml"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
} |
|
|
|
# If this is not a striker, collect definition files. |
|
if ($this_host_type ne "striker") |
|
{ |
|
print "- Collecting server definitions... "; |
|
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
print "Done!\n"; |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub process_switches |
|
{ |
|
my ($anvil) = @_; |
|
|
|
$anvil->data->{collect_from} = []; |
|
$anvil->Database->get_hosts(); |
|
|
|
if ($anvil->data->{switches}{anvil}) |
|
{ |
|
if ($anvil->data->{switches}{anvil} eq "#!SET!#") |
|
{ |
|
# Show a list of Anvil! systems. |
|
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n"; |
|
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}}) |
|
{ |
|
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n"; |
|
} |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
# Make sure the anvil is valid. |
|
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({ |
|
debug => 2, |
|
anvil => $anvil->data->{switches}{anvil}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:anvil_name' => $anvil_name, |
|
's2:anvil_uuid' => $anvil_uuid, |
|
}}); |
|
|
|
if (not $anvil_name) |
|
{ |
|
# Bad name. |
|
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Add the host_uuids to the collect_from array. |
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; |
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; |
|
} |
|
|
|
if ($anvil->data->{switches}{hosts}) |
|
{ |
|
if ($anvil->data->{switches}{hosts} eq "#!SET!#") |
|
{ |
|
# Show a list of all machines. |
|
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n"; |
|
foreach my $host_type ("striker", "node", "dr") |
|
{ |
|
print "- Striker Dashboards:\n" if $host_type eq "striker"; |
|
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node"; |
|
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr"; |
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
|
{ |
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host_name' => $host_name, |
|
's2:host_uuid' => $host_uuid, |
|
's3:this_host_type' => $this_host_type, |
|
}}); |
|
next if $host_type ne $this_host_type; |
|
|
|
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n"; |
|
} |
|
} |
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
foreach my $host (split/,/, $anvil->data->{switches}{hosts}) |
|
{ |
|
# Make sure this host is valid. |
|
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host' => $host, |
|
's2:host_uuid' => $host_uuid, |
|
}}); |
|
if (not $host_uuid) |
|
{ |
|
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
|
} |
|
} |
|
|
|
# If we were restricted to an anvil or host, make sure we've added the Strikers. |
|
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts})) |
|
{ |
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}}) |
|
{ |
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name}; |
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host_name' => $host_name, |
|
's2:host_uuid' => $host_uuid, |
|
's3:this_host_type' => $this_host_type, |
|
}}); |
|
next if $this_host_type ne "striker"; |
|
|
|
my $seen = 0; |
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:this_host_uuid' => $this_host_uuid, |
|
's2:host_uuid' => $host_uuid, |
|
}}); |
|
if ($this_host_uuid eq $host_uuid) |
|
{ |
|
$seen = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }}); |
|
last; |
|
} |
|
} |
|
|
|
if (not $seen) |
|
{ |
|
push @{$anvil->data->{collect_from}}, $host_uuid; |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
}
|
|
|