You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
738 lines
26 KiB
738 lines
26 KiB
1 year ago
|
#!/usr/bin/perl
|
||
|
#
|
||
|
# This program will collect data from all accessible machines and compile it into a common tarball. This is
|
||
|
# designed to make it easier to diagnose faults.
|
||
|
#
|
||
|
# Exit codes;
|
||
|
# 0 = Normal exit.
|
||
|
# 1 = No database connection.
|
||
|
#
|
||
|
# TODO:
|
||
|
#
|
||
|
# USAGE:
|
||
|
#
|
||
|
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
use Anvil::Tools;
|
||
|
require POSIX;
|
||
|
use Term::Cap;
|
||
|
use Text::Diff;
|
||
|
use Data::Dumper;
|
||
|
|
||
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
||
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
||
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
||
|
{
|
||
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
||
|
}
|
||
|
|
||
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
||
|
$| = 1;
|
||
|
|
||
|
my $anvil = Anvil::Tools->new();
|
||
|
|
||
|
### TODO: Remove this before final release
|
||
|
$anvil->Log->level({set => 2});
|
||
|
$anvil->Log->secure({set => 1});
|
||
|
##########################################
|
||
|
|
||
|
# Read switches (target ([user@]host[:port]) and the file with the target's password.
|
||
|
$anvil->Get->switches({list => ["anvil", "hosts"], man => $THIS_FILE});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
|
||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
|
||
|
|
||
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
||
|
# is to setup the database server.
|
||
|
$anvil->Database->connect();
|
||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
|
||
|
if (not $anvil->data->{sys}{database}{connections})
|
||
|
{
|
||
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
|
||
|
# again after we exit.
|
||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"});
|
||
|
sleep 10;
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
|
||
|
# Make sure we're running as 'root'
|
||
|
# $< == real UID, $> == effective UID
|
||
|
if (($< != 0) && ($> != 0))
|
||
|
{
|
||
|
# Not root
|
||
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
|
||
|
# Make sure we're a striker.
|
||
|
if ($anvil->Get->host_type ne "striker")
|
||
|
{
|
||
|
print "This has to be run on a Striker dashboard.\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
|
||
|
print "Data collection has begun.\n";
|
||
|
print "Depending on how many systems we're collecting from, this could take a while.\n";
|
||
|
|
||
|
process_switches($anvil);
|
||
|
|
||
|
collect_data($anvil);
|
||
|
|
||
|
# Create the tarball now.
|
||
|
print "Data collection complete, creating the tarball now... ";
|
||
|
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2";
|
||
|
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:tarball' => $tarball,
|
||
|
's2:shell_call' => $shell_call,
|
||
|
}});
|
||
|
|
||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
|
||
|
print "\n[ Complete ] - The debug data is here: [".$tarball."]\n";
|
||
|
print "[ Warning ] - The collected logs likely include sensitive information! Share is carefully!\n";
|
||
|
|
||
|
|
||
|
|
||
|
$anvil->nice_exit({exit_code => 0});
|
||
|
|
||
|
|
||
|
#############################################################################################################
|
||
|
# Functions #
|
||
|
#############################################################################################################
|
||
|
|
||
|
sub collect_data
|
||
|
{
|
||
|
my ($anvil) = @_;
|
||
|
|
||
|
# Make sure the collection directory exists.
|
||
|
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1});
|
||
|
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
"sys::date_and_time" => $anvil->data->{sys}{date_and_time},
|
||
|
"sys::compile_directory" => $anvil->data->{sys}{compile_directory},
|
||
|
}});
|
||
|
|
||
|
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
|
||
|
if ($failed)
|
||
|
{
|
||
|
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
|
||
|
my $hosts = @{$anvil->data->{collect_from}};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }});
|
||
|
foreach my $host_type ("striker", "node", "dr")
|
||
|
{
|
||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
|
||
|
{
|
||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
|
||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:host_name' => $host_name,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
's3:short_host_name' => $short_host_name,
|
||
|
's4:this_host_type' => $this_host_type,
|
||
|
}});
|
||
|
next if $host_type ne $this_host_type;
|
||
|
|
||
|
# Are we collecting from a subset only?
|
||
|
if ($hosts)
|
||
|
{
|
||
|
# Yes, is this host one of them?
|
||
|
my $found = 0;
|
||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
|
||
|
{
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
host_uuid => $host_uuid,
|
||
|
this_host_uuid => $this_host_uuid,
|
||
|
}});
|
||
|
if ($this_host_uuid eq $host_uuid)
|
||
|
{
|
||
|
$found = 1;
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }});
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
next if not $found;
|
||
|
}
|
||
|
|
||
|
# Make sure there's a directory for this host.
|
||
|
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name;
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }});
|
||
|
if (not -d $target_directory)
|
||
|
{
|
||
|
my $failed = $anvil->Storage->make_directory({
|
||
|
directory => $target_directory,
|
||
|
mode => "777",
|
||
|
});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
|
||
|
if ($failed)
|
||
|
{
|
||
|
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Is this the local host or a remote one?
|
||
|
if ($host_uuid eq $anvil->Get->host_uuid)
|
||
|
{
|
||
|
### Collecting local data.
|
||
|
collect_local_data($anvil, $target_directory);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
# Collecting data from a remote machine
|
||
|
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory);
|
||
|
if ($problem)
|
||
|
{
|
||
|
# Create a file saying we couldn't access this machine.
|
||
|
my $body = "No access to: [".$host_name."] found.\n";
|
||
|
my $file = $target_directory."/no_access.txt";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $file,
|
||
|
body => $body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return(0);
|
||
|
}
|
||
|
|
||
|
sub collect_remote_data
|
||
|
{
|
||
|
my ($anvil, $host_uuid, $target_directory) = @_;
|
||
|
|
||
|
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name};
|
||
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
||
|
my $failed_body = "File not copied from: [".$host_name."].\n";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:host_name' => $host_name,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
's3:short_host_name' => $short_host_name,
|
||
|
's4:this_host_type' => $this_host_type,
|
||
|
's5:target_directory' => $target_directory,
|
||
|
}});
|
||
|
|
||
|
# Dump the previous boot logs to a file.
|
||
|
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n";
|
||
|
print "- Testing access...\n";
|
||
|
my $matches = $anvil->Network->find_access({
|
||
|
debug => 2,
|
||
|
target => $host_name,
|
||
|
});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
|
||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
|
||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
|
||
|
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
|
||
|
{
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
|
||
|
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
|
||
|
{
|
||
|
next if $network_name !~ /^$preferred_network/;
|
||
|
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
|
||
|
my $test_access = $anvil->Remote->test_access({target => $target_ip});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:network_name' => $network_name,
|
||
|
's2:target_ip' => $target_ip,
|
||
|
's3:test_access' => $test_access,
|
||
|
}});
|
||
|
|
||
|
if ($test_access)
|
||
|
{
|
||
|
# We're good.
|
||
|
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n";
|
||
|
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip;
|
||
|
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name;
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip},
|
||
|
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network},
|
||
|
}});
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (not $anvil->data->{peer}{$short_host_name}{access}{ip})
|
||
|
{
|
||
|
print "No access!!\n";
|
||
|
print "- Not able to collect data from this host, skipping.\n";
|
||
|
return(1);
|
||
|
}
|
||
|
|
||
|
print "- Writing out system logs from the previous boot... ";
|
||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
my ($output, $error, $return_code) = $anvil->Remote->call({
|
||
|
shell_call => $shell_call,
|
||
|
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
|
||
|
});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
error => $error,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
|
||
|
# Copying the file
|
||
|
print "Done! Copying to here... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
my $test_file = $target_directory."/tmp/journalctl-previous-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
|
||
|
if (not -e $test_file)
|
||
|
{
|
||
|
print "Done.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "Failed!\n";
|
||
|
print "- For some reason, this file was not collected.\n";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $test_file,
|
||
|
body => $failed_body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
|
||
|
# Dump the current boot logs
|
||
|
print "- Grabbing system logs from this boot... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
|
||
|
# Copying the file
|
||
|
print "Done! Copying to here... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
$test_file = $target_directory."/journalctl-current-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
|
||
|
if (not -e $test_file)
|
||
|
{
|
||
|
print "Done.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "Failed!\n";
|
||
|
print "- For some reason, this file was not collected.\n";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $test_file,
|
||
|
body => $failed_body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
|
||
|
# If we're a striker, dump the database also.
|
||
|
if ($this_host_type eq "striker")
|
||
|
{
|
||
|
# What's the password and address?
|
||
|
if (not exists $anvil->data->{database}{$host_uuid})
|
||
|
{
|
||
|
# The remote striker isn't known
|
||
|
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... ";
|
||
|
my $pg_file = "/root/.pgpass";
|
||
|
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password};
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $pg_file,
|
||
|
body => $pg_body,
|
||
|
mode => "600",
|
||
|
overwrite => 0,
|
||
|
backup => 0,
|
||
|
});
|
||
|
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
if ($return_code)
|
||
|
{
|
||
|
# Failed
|
||
|
print "Failed!\n";
|
||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
|
||
|
print "========\n";
|
||
|
print $output."\n";
|
||
|
print "========\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
unlink $pg_file;
|
||
|
print "Done!\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
print "- Grabbing hosts file... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
$test_file = $target_directory."/hosts";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
|
||
|
if (not -e $test_file)
|
||
|
{
|
||
|
print "Done.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "Failed!\n";
|
||
|
print "- For some reason, this file was not collected.\n";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $test_file,
|
||
|
body => $failed_body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
|
||
|
print "- Grabbing Anvil! log... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
$test_file = $target_directory."/anvil.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
|
||
|
if (not -e $test_file)
|
||
|
{
|
||
|
print "Done.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "Failed!\n";
|
||
|
print "- For some reason, this file was not collected.\n";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $test_file,
|
||
|
body => $failed_body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
|
||
|
# If this is a node, grab the shared files.
|
||
|
if ($this_host_type eq "node")
|
||
|
{
|
||
|
print "- Collecting the cluster information base (CIB)... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > /tmp/cib.xml";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
|
||
|
# Copying the file
|
||
|
print "Done! Copying to here... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
my $test_file = $target_directory."/cib.xml";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
|
||
|
if (not -e $test_file)
|
||
|
{
|
||
|
print "Done.\n";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "Failed!\n";
|
||
|
print "- For some reason, this file was not collected.\n";
|
||
|
$anvil->Storage->write_file({
|
||
|
file => $test_file,
|
||
|
body => $failed_body,
|
||
|
overwrite => 1,
|
||
|
backup => 0,
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# If this is not a striker, collect definition files.
|
||
|
if ($this_host_type ne "striker")
|
||
|
{
|
||
|
print "- Collecting server definitions... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
print "Done!\n";
|
||
|
|
||
|
print "- Collecting replicated storage config... ";
|
||
|
$anvil->Storage->rsync({
|
||
|
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d",
|
||
|
destination => $target_directory."/",
|
||
|
});
|
||
|
print "Done!\n";
|
||
|
}
|
||
|
|
||
|
return(0);
|
||
|
}
|
||
|
|
||
|
sub collect_local_data
|
||
|
{
|
||
|
my ($anvil, $target_directory) = @_;
|
||
|
|
||
|
my $host_uuid = $anvil->Get->host_uuid();
|
||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:target_directory' => $target_directory,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
's3:this_host_type' => $this_host_type,
|
||
|
}});
|
||
|
|
||
|
# Dump the previous boot logs to a file.
|
||
|
print "\nGrabbing logs and data from the local system.\n";
|
||
|
print "- Grabbing system logs from the previous boot... ";
|
||
|
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
|
||
|
# Dump the current boot logs
|
||
|
print "- Grabbing system logs from this boot... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
|
||
|
# If we're a striker, dump the database also.
|
||
|
if ($this_host_type eq "striker")
|
||
|
{
|
||
|
print "- Dumping and compressing database data, PLEASE BE PATIENT!... ";
|
||
|
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
if ($return_code)
|
||
|
{
|
||
|
# Failed
|
||
|
print "Failed!\n";
|
||
|
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
|
||
|
print "========\n";
|
||
|
print $output."\n";
|
||
|
print "========\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
print "Done!\n";
|
||
|
}
|
||
|
|
||
|
print "- Grabbing hosts file... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
|
||
|
print "- Grabbing Anvil! log... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
|
||
|
# If this is a node, grab the shared files.
|
||
|
if ($this_host_type eq "node")
|
||
|
{
|
||
|
print "- Collecting the cluster information base (CIB)... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > ".$target_directory."/cib.xml";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
}
|
||
|
|
||
|
# If this is not a striker, collect definition files.
|
||
|
if ($this_host_type ne "striker")
|
||
|
{
|
||
|
print "- Collecting server definitions... ";
|
||
|
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/";
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
||
|
|
||
|
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
output => $output,
|
||
|
return_code => $return_code,
|
||
|
}});
|
||
|
print "Done!\n";
|
||
|
}
|
||
|
|
||
|
return(0);
|
||
|
}
|
||
|
|
||
|
sub process_switches
|
||
|
{
|
||
|
my ($anvil) = @_;
|
||
|
|
||
|
$anvil->data->{collect_from} = [];
|
||
|
$anvil->Database->get_hosts();
|
||
|
|
||
|
if ($anvil->data->{switches}{anvil})
|
||
|
{
|
||
|
if ($anvil->data->{switches}{anvil} eq "#!SET!#")
|
||
|
{
|
||
|
# Show a list of Anvil! systems.
|
||
|
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n";
|
||
|
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}})
|
||
|
{
|
||
|
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n";
|
||
|
}
|
||
|
$anvil->nice_exit({exit_code => 0});
|
||
|
}
|
||
|
|
||
|
# Make sure the anvil is valid.
|
||
|
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({
|
||
|
debug => 2,
|
||
|
anvil => $anvil->data->{switches}{anvil},
|
||
|
});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:anvil_name' => $anvil_name,
|
||
|
's2:anvil_uuid' => $anvil_uuid,
|
||
|
}});
|
||
|
|
||
|
if (not $anvil_name)
|
||
|
{
|
||
|
# Bad name.
|
||
|
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
|
||
|
# Add the host_uuids to the collect_from array.
|
||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
|
||
|
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
|
||
|
}
|
||
|
|
||
|
if ($anvil->data->{switches}{hosts})
|
||
|
{
|
||
|
if ($anvil->data->{switches}{hosts} eq "#!SET!#")
|
||
|
{
|
||
|
# Show a list of all machines.
|
||
|
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n";
|
||
|
foreach my $host_type ("striker", "node", "dr")
|
||
|
{
|
||
|
print "- Striker Dashboards:\n" if $host_type eq "striker";
|
||
|
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node";
|
||
|
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr";
|
||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
|
||
|
{
|
||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
|
||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:host_name' => $host_name,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
's3:this_host_type' => $this_host_type,
|
||
|
}});
|
||
|
next if $host_type ne $this_host_type;
|
||
|
|
||
|
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$anvil->nice_exit({exit_code => 0});
|
||
|
}
|
||
|
|
||
|
foreach my $host (split/,/, $anvil->data->{switches}{hosts})
|
||
|
{
|
||
|
# Make sure this host is valid.
|
||
|
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host});
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:host' => $host,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
}});
|
||
|
if (not $host_uuid)
|
||
|
{
|
||
|
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n";
|
||
|
$anvil->nice_exit({exit_code => 1});
|
||
|
}
|
||
|
push @{$anvil->data->{collect_from}}, $host_uuid;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# If we were restricted to an anvil or host, make sure we've added the Strikers.
|
||
|
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts}))
|
||
|
{
|
||
|
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
|
||
|
{
|
||
|
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
|
||
|
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:host_name' => $host_name,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
's3:this_host_type' => $this_host_type,
|
||
|
}});
|
||
|
next if $this_host_type ne "striker";
|
||
|
|
||
|
my $seen = 0;
|
||
|
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
|
||
|
{
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||
|
's1:this_host_uuid' => $this_host_uuid,
|
||
|
's2:host_uuid' => $host_uuid,
|
||
|
}});
|
||
|
if ($this_host_uuid eq $host_uuid)
|
||
|
{
|
||
|
$seen = 1;
|
||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }});
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (not $seen)
|
||
|
{
|
||
|
push @{$anvil->data->{collect_from}}, $host_uuid;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return(0);
|
||
|
}
|