This adds the new 'striker-collect-debug' tool that collects all potentially useful debug info into a single tarball.

* Fixed a bug in Get->anvil_from_switch() to work when the Anvil! name is passed.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 1 year ago
parent bf1ccc8bee
commit a7ebe45f76
  1. 1
      Anvil/Tools.pm
  2. 4
      Anvil/Tools/Get.pm
  3. 1
      man/Makefile.am
  4. 2
      man/striker-check-machines.8
  5. 41
      man/striker-collect-debug.8
  6. 1
      tools/Makefile.am
  7. 28
      tools/anvil-manage-dr
  8. 225
      tools/anvil-manage-server-storage
  9. 737
      tools/striker-collect-debug

@ -1282,6 +1282,7 @@ sub _set_paths
swapon => "/usr/sbin/swapon", swapon => "/usr/sbin/swapon",
sysctl => "/usr/sbin/sysctl", sysctl => "/usr/sbin/sysctl",
systemctl => "/usr/bin/systemctl", systemctl => "/usr/bin/systemctl",
tar => "/usr/bin/tar",
timeout => "/usr/bin/timeout", timeout => "/usr/bin/timeout",
touch => "/usr/bin/touch", touch => "/usr/bin/touch",
tput => "/usr/bin/tput", tput => "/usr/bin/tput",

@ -161,10 +161,10 @@ sub anvil_from_switch
"switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid}, "switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid},
}}); }});
} }
elsif (exists $anvil->data->{anvils}{anvil_uuid}{$anvil_string}) elsif (exists $anvil->data->{anvils}{anvil_name}{$anvil_string})
{ {
$anvil->data->{switches}{anvil_name} = $anvil_string; $anvil->data->{switches}{anvil_name} = $anvil_string;
$anvil->data->{switches}{anvil_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_string}{anvil_uuid}; $anvil->data->{switches}{anvil_uuid} = $anvil->data->{anvils}{anvil_name}{$anvil_string}{anvil_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"switches::anvil_name" => $anvil->data->{switches}{anvil_name}, "switches::anvil_name" => $anvil->data->{switches}{anvil_name},
"switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid}, "switches::anvil_uuid" => $anvil->data->{switches}{anvil_uuid},

@ -25,4 +25,5 @@ dist_man8_MANS = \
anvil-watch-drbd.8 \ anvil-watch-drbd.8 \
scancore.8 \ scancore.8 \
striker-check-machines.8 \ striker-check-machines.8 \
striker-collect-debug.8 \
striker-initialize-host.8 striker-initialize-host.8

@ -22,7 +22,7 @@ Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a s
.SS "Commands:" .SS "Commands:"
.TP .TP
This program takes no commands. This program takes no commands.
.TP .IP
.SH AUTHOR .SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors. Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS" .SH "REPORTING BUGS"

@ -0,0 +1,41 @@
.\" Manpage for the Anvil! machine power and access reporting tool.
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH striker-collect-data "8" "July 04 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
striker-collect-data \- This program collects data needed to help diagnose problems with an Anvil! system.
.SH SYNOPSIS
.B striker-collect-data
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This program collects database data, logs, config files and other information needed to help diagnose problems with the Anvil! platform. By default, this collects all data from all accessible machines.
.TP
.B Note:
.TP
This program collects potentially secure information, like passwords. Be careful who you share the collected data with!
.TP
The data from Striker dashboards are always collected.
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-anvil\fR <name or UUID>
.TP
This restricts the data to be collected to the Striker dashboards and the specific Anvil! node pair.
.TP
\fB\-\-hosts\fR <comma-separated list of host names or UUIDs>
.TP
This can be used to specify which specific hosts data is collected from. Note that this can be used in conjuction with \fB\-\-anvil\fR to add additional hosts to collect data from, like DR hosts.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -52,6 +52,7 @@ dist_sbin_SCRIPTS = \
striker-auto-initialize-all \ striker-auto-initialize-all \
striker-boot-machine \ striker-boot-machine \
striker-check-machines \ striker-check-machines \
striker-collect-debug \
striker-db-report \ striker-db-report \
striker-db-status \ striker-db-status \
striker-file-manager \ striker-file-manager \

@ -383,6 +383,7 @@ sub sanity_check
{ {
# Is this server configured to be protected? # Is this server configured to be protected?
my $config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$server_name.".res"; my $config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$server_name.".res";
$config_file =~ s/\/\//\//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { config_file => $config_file }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { config_file => $config_file }});
if (not -e $config_file) if (not -e $config_file)
{ {
@ -398,9 +399,9 @@ sub sanity_check
variables => $variables, variables => $variables,
job_status => "failed", job_status => "failed",
}); });
}
$anvil->nice_exit({exit_code => 1}); $anvil->nice_exit({exit_code => 1});
} }
}
# If we're doing a --protect or --remove, make sure we're a node, the cluster is up, and both nodes # If we're doing a --protect or --remove, make sure we're a node, the cluster is up, and both nodes
# are ready. # are ready.
@ -533,7 +534,6 @@ sub sanity_check
# Get the Anvil! details. # Get the Anvil! details.
$anvil->Database->get_hosts(); $anvil->Database->get_hosts();
$anvil->Database->get_anvils();
$anvil->Database->get_storage_group_data({debug => 2}); $anvil->Database->get_storage_group_data({debug => 2});
$anvil->Database->get_dr_links({debug => 2}); $anvil->Database->get_dr_links({debug => 2});
@ -559,7 +559,9 @@ sub sanity_check
} }
} }
# If I don't have a dr_host_uuid yet, see which are available. If only one, use it. If two or more, tell the user they need to specify which. # If I don't have a dr_host_uuid yet, see which are available. If only one, use it. If two or more,
# and if the server is already being protected, determine which to use from it's config. Otherwise,
# tell the user they need to specify which.
if (not $dr_host_uuid) if (not $dr_host_uuid)
{ {
my $dr_count = keys %{$anvil->data->{dr_links}{by_anvil_uuid}{$anvil_uuid}{dr_link_host_name}}; my $dr_count = keys %{$anvil->data->{dr_links}{by_anvil_uuid}{$anvil_uuid}{dr_link_host_name}};
@ -587,6 +589,26 @@ sub sanity_check
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_host_uuid => $dr_host_uuid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_host_uuid => $dr_host_uuid }});
} }
} }
else
{
# Two or more. Is the server already protected? If so, try to find which DR it's
# using.
if (($anvil->data->{switches}{'connect'}) or ($anvil->data->{switches}{'disconnect'}))
{
# Read the config.
my $config_file = $anvil->data->{path}{directories}{drbd_resources}."/".$server_name.".res";
$config_file =~ s/\/\//\//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { config_file => $config_file }});
my $resource_config = $anvil->Storage->read_file({file => $config_file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_config => $resource_config }});
foreach my $line (split/\n/, $resource_config)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lineg => $line }});
}
}
}
} }
# If I still don't have a DR host, fail out. # If I still don't have a DR host, fail out.

@ -516,7 +516,7 @@ sub manage_disk_add
my $next_drbd_volume = ""; my $next_drbd_volume = "";
foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}}) foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}})
{ {
my $host_uuid = $anvil->Get->host_uuid_from_name({debug => 2, host_name => $this_host}); my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host});
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:this_host' => $this_host, 's1:this_host' => $this_host,
@ -988,7 +988,6 @@ sub manage_disk_add
$shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource; $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({ ($output, $return_code) = $anvil->System->call({
debug => 2,
background => 1, background => 1,
shell_call => $shell_call, shell_call => $shell_call,
}); });
@ -1002,12 +1001,20 @@ sub manage_disk_add
print "- Waiting for all peers to connect the new volume..."; print "- Waiting for all peers to connect the new volume...";
my $waiting = 1; my $waiting = 1;
my $wait_until = time + 300; my $wait_until = time + 300;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { wait_until => $wait_until }});
while ($waiting) while ($waiting)
{ {
$anvil->DRBD->get_status({debug => 2}); $anvil->DRBD->get_status({debug => 2});
my $peers_connected = 1; my $peers_connected = 1;
my $disks_ready = 0; my $disks_ready = 0;
foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{peers}}) my $drbd_peer_count = keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{connection}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_peer_count => $drbd_peer_count }});
if (not $drbd_peer_count)
{
$peers_connected = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers_connected => $peers_connected }});
}
foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{connection}})
{ {
my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host_name}); my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -1039,6 +1046,7 @@ sub manage_disk_add
$anvil->data->{peers}{$short_host_name}{role} = $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role}; $anvil->data->{peers}{$short_host_name}{role} = $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"peers::${short_host_name}::disk_state" => $anvil->data->{peers}{$short_host_name}{disk_state}, "peers::${short_host_name}::disk_state" => $anvil->data->{peers}{$short_host_name}{disk_state},
"peers::${short_host_name}::role" => $anvil->data->{peers}{$short_host_name}{role},
}}); }});
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{connection}}) foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{connection}})
{ {
@ -1054,12 +1062,12 @@ sub manage_disk_add
if (lc($replication_state) ne "established") if (lc($replication_state) ne "established")
{ {
$disks_ready = 0; $disks_ready = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers_connected => $peers_connected }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { disks_ready => $disks_ready }});
} }
if ((not $peer_disk_state) or (lc($peer_disk_state) eq "diskless")) if ((not $peer_disk_state) or (lc($peer_disk_state) eq "diskless"))
{ {
$disks_ready = 0; $disks_ready = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers_connected => $peers_connected }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { disks_ready => $disks_ready }});
} }
$anvil->data->{peers}{$peer_name}{disk_state} = $peer_disk_state; $anvil->data->{peers}{$peer_name}{disk_state} = $peer_disk_state;
@ -1070,7 +1078,7 @@ sub manage_disk_add
} }
} }
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers_connected => $peers_connected }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { disks_ready => $disks_ready }});
if ($disks_ready) if ($disks_ready)
{ {
$waiting = 0; $waiting = 0;
@ -1118,20 +1126,28 @@ sub manage_disk_add
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." primary ".$drbd_resource." --force"; my $shell_call = $anvil->data->{path}{exe}{drbdadm}." primary ".$drbd_resource." --force";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
print Dumper %{$anvil->data->{peers}};
die;
# Which node should be forced primary? # Which node should be forced primary?
my $already_primary = 1;
if (not $primary_on_host) if (not $primary_on_host)
{ {
# We'll make it primary. # We'll make it primary.
$primary_on_host = $short_host_name; $primary_on_host = $short_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { primary_on_host => $primary_on_host }}); $already_primary = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
primary_on_host => $primary_on_host,
already_primary => $already_primary,
}});
} }
my $primary_on_host_uuid = $anvil->Get->host_uuid_from_name({debug => 2, host_name => $primary_on_host}); my $primary_on_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $primary_on_host});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { primary_on_host_uuid => $primary_on_host_uuid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { primary_on_host_uuid => $primary_on_host_uuid }});
if ($primary_on_host_uuid eq $anvil->Get->host_uuid) if ($primary_on_host_uuid eq $anvil->Get->host_uuid)
{ {
print "- Forcing primary locally... "; print "- Forcing primary locally... ";
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); my ($output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, output => $output,
return_code => $return_code, return_code => $return_code,
@ -1166,6 +1182,10 @@ sub manage_disk_add
{ {
my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip};
my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
use_ip => $use_ip,
use_network => $use_network,
}});
print " - The resource is primary on the peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]"; print " - The resource is primary on the peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]";
my ($output, $error, $return_code) = $anvil->Remote->call({ my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call, shell_call => $shell_call,
@ -1210,53 +1230,123 @@ sub manage_disk_add
print "Initial sync does not appear to be required.\n"; print "Initial sync does not appear to be required.\n";
} }
=cut # Is the server running? If so, where.
my $startup_needed = 1; print "- Ready to add the new disk. Checking if the server is running...\n";
my $local_role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} : ""; my $server_host = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_role => $local_role }});
# Create the DRBD metadata. For this, we don't fail.
foreach my $host_type ("node", "dr") foreach my $host_type ("node", "dr")
{ {
foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}}) foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}})
{ {
my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid}; my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid};
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." --force create-md --max-peers=3 ".$drbd_resource."/".$next_drbd_volume; my $shell_call = $anvil->data->{path}{exe}{'anvil-special-operations'}." --task refresh-drbd-resource --resource ".$drbd_resource.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:short_host_name' => $short_host_name, 's1:short_host_name' => $short_host_name,
's2:host_uuid' => $host_uuid, 's2:host_uuid' => $host_uuid,
's7:shell_call' => $shell_call, 's3:shell_call' => $shell_call,
}}); }});
if ($host_uuid eq $anvil->Get->host_uuid) if ($host_uuid eq $anvil->Get->host_uuid)
{ {
print " - Creating the new local LV: [".$backing_disk."]..."; $anvil->Server->find();
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); }
else
{
my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip};
$anvil->Server->find({target => $use_ip });
}
if ((exists $anvil->data->{server}{location}{$server_name}) &&
($anvil->data->{server}{location}{$server_name}{host_name}))
{
my $this_host = defined $anvil->data->{server}{location}{$server_name}{host_name} ? $anvil->data->{server}{location}{$server_name}{host_name} : "";
my $server_status = defined $anvil->data->{server}{location}{$server_name}{status} ? $anvil->data->{server}{location}{$server_name}{status} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, this_host => $this_host,
return_code => $return_code, server_status => $server_status,
}}); }});
if ($return_code) if ($server_status eq "running")
{ {
print " Error!\n"; # Found it.
print "[ FAILED ] - When trying to create the new local logical volume: [".$backing_disk."]\n"; $server_host = $this_host;
print "[ FAILED ] - using the command: [".$shell_call."]\n"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_host => $server_host }});
print "[ FAILED ] - The return code: [".$return_code."] was received, expected '0'. Output, if any:\n"; if ($host_uuid eq $anvil->Get->host_uuid)
print "==========\n"; {
print $output."\n"; print "- The server is running on this host, we'll attach the disk here.\n";
print "==========\n";
print "The creation of the new replicatedd disk is incomplete, manual intervention is required!!\n";
$anvil->nice_exit({exit_code => 1});
} }
else else
{ {
print "- The server is running on: [".$server_host."], we'll attach the disk there.\n";
}
last;
}
}
}
}
my $offline = 0;
if (not $server_host)
{
print "- The server isn't running anywhere, we'll attach the disk offline on this host.\n";
$server_host = $short_host_name;
$offline = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_host => $server_host,
offline => $offline,
}});
}
$shell_call = $anvil->data->{path}{exe}{setsid}." --wait ".$anvil->data->{path}{exe}{virsh}." attach-disk ".$server_name." ";
$shell_call .= "/dev/drbd/by-res/".$drbd_resource."/".$next_drbd_volume." ".$new_device_target." ";
$shell_call .= "--persistent --targetbus ".$disk_device_bus." ";
$shell_call .= "--cache ".$disk_cache." ";
$shell_call .= "--io ".$disk_io_policy;
$shell_call .= "--sourcetype block --subdriver raw";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
if ($offline)
{
# Define the VM, if needed, then add the drive, dump the config and push it out.
print "- Defining the server: [".$server_name."] to prepare for 'virsh' modification of the server.\n";
update_definition($anvil, "define", "");
# Update the definition.
print "- Adding the drive to the definition now.\n";
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "- Updating the stored definition and undefining the server now...\n";
update_definition($anvil, "undefine", "");
print "Done!\n"; print "Done!\n";
$anvil->nice_exit({exit_code => 0});
} }
else
{
# Add the drive live, dump the new definition and push it out.
my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $server_host});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }});
if ($host_uuid eq $anvil->Get->host_uuid)
{
# Do the add here.
print "- Adding the drive to the server directly...\n";
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "- Updating the stored definition and undefining the server now...\n";
update_definition($anvil, "undefine", "");
print "Done!\n";
$anvil->nice_exit({exit_code => 0});
} }
else else
{ {
# Do the add on the target.
my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip}; my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip};
my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { use_ip => $use_ip }});
print " - Creating the new LV on the peer: [".$short_host_name.":".$backing_disk."], via: [".$use_ip." (".$use_network.")]"; print " - Adding the drivve to the server using its host: [".$server_host."] via: [".$use_ip."]...\n";
my ($output, $error, $return_code) = $anvil->Remote->call({ my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call, shell_call => $shell_call,
target => $use_ip, target => $use_ip,
@ -1266,28 +1356,45 @@ sub manage_disk_add
error => $error, error => $error,
return_code => $return_code, return_code => $return_code,
}}); }});
if ($return_code)
# Get the updated definition file.
my $shell_call = $anvil->data->{path}{exe}{setsid}." --wait ".$anvil->data->{path}{exe}{virsh}." dumpxml --inactive ".$server_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
(my $virsh_definition, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $use_ip,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
virsh_definition => $virsh_definition,
error => $error,
return_code => $return_code,
}});
# Make sure the $output is valid XML.
my $problem = $anvil->Server->parse_definition({
server => $server_name,
source => "from_virsh",
definition => $virsh_definition,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{ {
print " Error!\n"; # Failed?!
print "[ FAILED ] - When trying to create the peer's logical volume: [".$backing_disk."]\n"; print "
print "[ FAILED ] - using the command: [".$shell_call."]\n"; [ Error ] - The definition file read from the remote host appears to be invalid after trying to attach the
print "[ FAILED ] - The return code: [".$return_code."] was received, expected '0'. Output, if any:\n"; [ Error ] - disk! It is unsafe to update the on disk and in DB definition. It's likely the attach has failed.
print "==] STDOUT [========\n"; [ Error ] - Manual update to the server is likely required now.
print $output."\n"; ";
print "==] STDERR [========\n";
print $error."\n";
print "====================\n";
print "The creation of the new replicated disk is incomplete, manual intervention is required!!\n";
$anvil->nice_exit({exit_code => 1}); $anvil->nice_exit({exit_code => 1});
} }
else
{ print "- Updating the stored definition and undefining the server now...\n";
update_definition($anvil, "undefine", $virsh_definition);
print "Done!\n"; print "Done!\n";
$anvil->nice_exit({exit_code => 0});
} }
} }
}
}
=cut
return(0); return(0);
} }
@ -1695,7 +1802,7 @@ sub manage_optical
if (not -f $iso) if (not -f $iso)
{ {
print "[ Error ] - The target: [".$iso."] doesn't exist, can't insert it into the optical drive.\n"; print "[ Error ] - The target: [".$iso."] doesn't exist, can't insert it into the optical drive.\n";
update_definition($anvil, "undefine"); update_definition($anvil, "undefine", "");
$anvil->nice_exit({exit_code => 1}); $anvil->nice_exit({exit_code => 1});
} }
else else
@ -1707,7 +1814,7 @@ sub manage_optical
# If the server is running, update the on-disk and in-DB definition. # If the server is running, update the on-disk and in-DB definition.
print "Defining the server: [".$server_name."] to prepare for 'virsh' modification of the server.\n"; print "Defining the server: [".$server_name."] to prepare for 'virsh' modification of the server.\n";
update_definition($anvil, "define"); update_definition($anvil, "define", "");
# Now we can modify the server using virsh. # Now we can modify the server using virsh.
if ($anvil->data->{switches}{insert}) if ($anvil->data->{switches}{insert})
@ -1742,7 +1849,7 @@ sub manage_optical
print "'virsh' Output: [".$output."]\n"; print "'virsh' Output: [".$output."]\n";
print "Updating the stored definition and undefining the server now:\n"; print "Updating the stored definition and undefining the server now:\n";
update_definition($anvil, "undefine"); update_definition($anvil, "undefine", "");
print "Done!\n"; print "Done!\n";
return(0); return(0);
@ -1929,7 +2036,7 @@ sub check_drbd_peer_access
my $all_online = 1; my $all_online = 1;
foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}}) foreach my $this_host (sort {$a cmp $b} keys %{$anvil->data->{drbd}{drbd_node}})
{ {
my $host_uuid = $anvil->Get->host_uuid_from_name({debug => 2, host_name => $this_host}); my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host});
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:this_host' => $this_host, 's1:this_host' => $this_host,
@ -2284,8 +2391,11 @@ sub validate_server
# Update the definition on disk and in the DB, and define or undefine if requested. # Update the definition on disk and in the DB, and define or undefine if requested.
sub update_definition sub update_definition
{ {
my ($anvil, $task) = @_; my ($anvil, $task, $definition) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { task => $task }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
task => $task,
definition => $definition,
}});
my $short_host_name = $anvil->Get->short_host_name; my $short_host_name = $anvil->Get->short_host_name;
my $server_name = $anvil->data->{switches}{server_name}; my $server_name = $anvil->data->{switches}{server_name};
@ -2330,7 +2440,9 @@ sub update_definition
my $disk_definition = $anvil->data->{server}{$short_host_name}{$server_name}{from_disk}{xml} ? $anvil->data->{server}{$short_host_name}{$server_name}{from_disk}{xml} : ""; my $disk_definition = $anvil->data->{server}{$short_host_name}{$server_name}{from_disk}{xml} ? $anvil->data->{server}{$short_host_name}{$server_name}{from_disk}{xml} : "";
my $virsh_definition = $anvil->data->{server}{$short_host_name}{$server_name}{from_virsh}{xml} ? $anvil->data->{server}{$short_host_name}{$server_name}{from_virsh}{xml} : ""; my $virsh_definition = $anvil->data->{server}{$short_host_name}{$server_name}{from_virsh}{xml} ? $anvil->data->{server}{$short_host_name}{$server_name}{from_virsh}{xml} : "";
my $use_definition = ""; my $use_definition = $virsh_definition;
if (not $use_definition)
{
if (($server_running_here) or (($server_state eq "running") && ($virsh_definition))) if (($server_running_here) or (($server_state eq "running") && ($virsh_definition)))
{ {
# Get the live definition # Get the live definition
@ -2405,6 +2517,7 @@ sub update_definition
definition => $virsh_definition, definition => $virsh_definition,
}); });
} }
}
if (not $use_definition) if (not $use_definition)
{ {

@ -0,0 +1,737 @@
#!/usr/bin/perl
#
# This program will collect data from all accessible machines and compile it into a common tarball. This is
# designed to make it easier to diagnose faults.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connection.
#
# TODO:
#
# USAGE:
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Term::Cap;
use Text::Diff;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
### TODO: Remove this before final release
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
##########################################
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => ["anvil", "hosts"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0305"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
# Make sure we're a striker.
if ($anvil->Get->host_type ne "striker")
{
print "This has to be run on a Striker dashboard.\n";
$anvil->nice_exit({exit_code => 1});
}
print "Data collection has begun.\n";
print "Depending on how many systems we're collecting from, this could take a while.\n";
process_switches($anvil);
collect_data($anvil);
# Create the tarball now.
print "Data collection complete, creating the tarball now... ";
my $tarball = "/root/anvil-debug_".$anvil->data->{sys}{date_and_time}.".tar.bz2";
my $shell_call = $anvil->data->{path}{exe}{tar}." -cvjf ".$tarball." ".$anvil->data->{sys}{compile_directory};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:tarball' => $tarball,
's2:shell_call' => $shell_call,
}});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
print "\n[ Complete ] - The debug data is here: [".$tarball."]\n";
print "[ Warning ] - The collected logs likely include sensitive information! Share is carefully!\n";
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
sub collect_data
{
my ($anvil) = @_;
# Make sure the collection directory exists.
$anvil->data->{sys}{date_and_time} = $anvil->Get->date_and_time({file_name => 1});
$anvil->data->{sys}{compile_directory} = "/tmp/anvil-debug_".$anvil->data->{sys}{date_and_time};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::date_and_time" => $anvil->data->{sys}{date_and_time},
"sys::compile_directory" => $anvil->data->{sys}{compile_directory},
}});
my $failed = $anvil->Storage->make_directory({directory => $anvil->data->{sys}{compile_directory}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
if ($failed)
{
print "Failed to create the directory: [".$anvil->data->{sys}{compile_directory}."]. The error should be logged.\n";
$anvil->nice_exit({exit_code => 1});
}
my $hosts = @{$anvil->data->{collect_from}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hosts => $hosts }});
foreach my $host_type ("striker", "node", "dr")
{
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:short_host_name' => $short_host_name,
's4:this_host_type' => $this_host_type,
}});
next if $host_type ne $this_host_type;
# Are we collecting from a subset only?
if ($hosts)
{
# Yes, is this host one of them?
my $found = 0;
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
this_host_uuid => $this_host_uuid,
}});
if ($this_host_uuid eq $host_uuid)
{
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { found => $found }});
last;
}
}
next if not $found;
}
# Make sure there's a directory for this host.
my $target_directory = $anvil->data->{sys}{compile_directory}."/".$short_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target_directory => $target_directory }});
if (not -d $target_directory)
{
my $failed = $anvil->Storage->make_directory({
directory => $target_directory,
mode => "777",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
if ($failed)
{
print "Failed to create the directory: [".$target_directory."]. The error should be logged.\n";
$anvil->nice_exit({exit_code => 1});
}
}
# Is this the local host or a remote one?
if ($host_uuid eq $anvil->Get->host_uuid)
{
### Collecting local data.
collect_local_data($anvil, $target_directory);
}
else
{
# Collecting data from a remote machine
my $problem = collect_remote_data($anvil, $host_uuid, $target_directory);
if ($problem)
{
# Create a file saying we couldn't access this machine.
my $body = "No access to: [".$host_name."] found.\n";
my $file = $target_directory."/no_access.txt";
$anvil->Storage->write_file({
file => $file,
body => $body,
overwrite => 1,
backup => 0,
});
}
}
}
}
return(0);
}
sub collect_remote_data
{
my ($anvil, $host_uuid, $target_directory) = @_;
my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name};
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
my $failed_body = "File not copied from: [".$host_name."].\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:short_host_name' => $short_host_name,
's4:this_host_type' => $this_host_type,
's5:target_directory' => $target_directory,
}});
# Dump the previous boot logs to a file.
print "\nGrabbing logs and data from the remote system: [".$short_host_name."].\n";
print "- Testing access...\n";
my $matches = $anvil->Network->find_access({
debug => 2,
target => $host_name,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { matches => $matches }});
$anvil->data->{peer}{$short_host_name}{access}{ip} = "";
$anvil->data->{peer}{$short_host_name}{access}{network} = "";
foreach my $preferred_network ("bcn", "mn", "ifn", "sn")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { preferred_network => $preferred_network }});
foreach my $network_name (sort {$a cmp $b} keys %{$anvil->data->{network_access}})
{
next if $network_name !~ /^$preferred_network/;
my $target_ip = $anvil->data->{network_access}{$network_name}{target_ip_address};
my $test_access = $anvil->Remote->test_access({target => $target_ip});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:network_name' => $network_name,
's2:target_ip' => $target_ip,
's3:test_access' => $test_access,
}});
if ($test_access)
{
# We're good.
print "- Found access over the network: [".$network_name."] using the target IP: [".$target_ip."]\n";
$anvil->data->{peer}{$short_host_name}{access}{ip} = $target_ip;
$anvil->data->{peer}{$short_host_name}{access}{network} = $network_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:peer::${short_host_name}::access::ip" => $anvil->data->{peer}{$short_host_name}{access}{ip},
"s2:peer::${short_host_name}::access::network" => $anvil->data->{peer}{$short_host_name}{access}{network},
}});
}
}
}
if (not $anvil->data->{peer}{$short_host_name}{access}{ip})
{
print "No access!!\n";
print "- Not able to collect data from this host, skipping.\n";
return(1);
}
print "- Writing out system logs from the previous boot... ";
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > /tmp/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $anvil->data->{peer}{$short_host_name}{access}{ip},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-previous-boot.log",
destination => $target_directory."/",
});
my $test_file = $target_directory."/tmp/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# Dump the current boot logs
print "- Grabbing system logs from this boot... ";
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > /tmp/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/journalctl-current-boot.log",
destination => $target_directory."/",
});
$test_file = $target_directory."/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# If we're a striker, dump the database also.
if ($this_host_type eq "striker")
{
# What's the password and address?
if (not exists $anvil->data->{database}{$host_uuid})
{
# The remote striker isn't known
print "- The host is a Striker, but we don't have database access info, skipping DB dump.\n";
}
else
{
print "- Dumping and compressing remote database data, PLEASE BE PATIENT!... ";
my $pg_file = "/root/.pgpass";
my $pg_body = "*:*:*:admin:".$anvil->data->{database}{$host_uuid}{password};
$anvil->Storage->write_file({
file => $pg_file,
body => $pg_body,
mode => "600",
overwrite => 0,
backup => 0,
});
my $shell_call = $anvil->data->{path}{exe}{pg_dump}." -h ".$anvil->data->{peer}{$short_host_name}{access}{ip}." -U admin anvil 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Failed
print "Failed!\n";
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
print "========\n";
print $output."\n";
print "========\n";
$anvil->nice_exit({exit_code => 1});
}
unlink $pg_file;
print "Done!\n";
}
}
print "- Grabbing hosts file... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/hosts",
destination => $target_directory."/",
});
$test_file = $target_directory."/hosts";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
print "- Grabbing Anvil! log... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/var/log/anvil.log",
destination => $target_directory."/",
});
$test_file = $target_directory."/anvil.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
# If this is a node, grab the shared files.
if ($this_host_type eq "node")
{
print "- Collecting the cluster information base (CIB)... ";
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > /tmp/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Copying the file
print "Done! Copying to here... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/tmp/cib.xml",
destination => $target_directory."/",
});
my $test_file = $target_directory."/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
if (not -e $test_file)
{
print "Done.\n";
}
else
{
print "Failed!\n";
print "- For some reason, this file was not collected.\n";
$anvil->Storage->write_file({
file => $test_file,
body => $failed_body,
overwrite => 1,
backup => 0,
});
}
}
# If this is not a striker, collect definition files.
if ($this_host_type ne "striker")
{
print "- Collecting server definitions... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/mnt/shared/definitions",
destination => $target_directory."/",
});
print "Done!\n";
print "- Collecting replicated storage config... ";
$anvil->Storage->rsync({
source => "root\@".$anvil->data->{peer}{$short_host_name}{access}{ip}.":/etc/drbd.d",
destination => $target_directory."/",
});
print "Done!\n";
}
return(0);
}
sub collect_local_data
{
my ($anvil, $target_directory) = @_;
my $host_uuid = $anvil->Get->host_uuid();
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:target_directory' => $target_directory,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
# Dump the previous boot logs to a file.
print "\nGrabbing logs and data from the local system.\n";
print "- Grabbing system logs from the previous boot... ";
my $shell_call = $anvil->data->{path}{exe}{journalctl}." -b -1 > ".$target_directory."/journalctl-previous-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# Dump the current boot logs
print "- Grabbing system logs from this boot... ";
$shell_call = $anvil->data->{path}{exe}{journalctl}." -b 0 > ".$target_directory."/journalctl-current-boot.log";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# If we're a striker, dump the database also.
if ($this_host_type eq "striker")
{
print "- Dumping and compressing database data, PLEASE BE PATIENT!... ";
my $shell_call = $anvil->data->{path}{exe}{su}." postgres -c \"".$anvil->data->{path}{exe}{pg_dump}." anvil\" 2>/dev/null | ".$anvil->data->{path}{exe}{bzip2}." --stdout > ".$target_directory."/anvil.out.bz2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Failed
print "Failed!\n";
print "Expected the return code '0', but got: [".$return_code."]. The error, if any, was:\n";
print "========\n";
print $output."\n";
print "========\n";
$anvil->nice_exit({exit_code => 1});
}
print "Done!\n";
}
print "- Grabbing hosts file... ";
$shell_call = $anvil->data->{path}{exe}{cp}." /etc/hosts ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
print "- Grabbing Anvil! log... ";
$shell_call = $anvil->data->{path}{exe}{cp}." /var/log/anvil.log ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
# If this is a node, grab the shared files.
if ($this_host_type eq "node")
{
print "- Collecting the cluster information base (CIB)... ";
$shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib > ".$target_directory."/cib.xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
}
# If this is not a striker, collect definition files.
if ($this_host_type ne "striker")
{
print "- Collecting server definitions... ";
$shell_call = $anvil->data->{path}{exe}{rsync}." -av /mnt/shared/definitions ".$target_directory."/";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
print "Done!\n";
}
return(0);
}
sub process_switches
{
my ($anvil) = @_;
$anvil->data->{collect_from} = [];
$anvil->Database->get_hosts();
if ($anvil->data->{switches}{anvil})
{
if ($anvil->data->{switches}{anvil} eq "#!SET!#")
{
# Show a list of Anvil! systems.
print "Available Anvil! systems. Use '--anvil <name or UUID>' to collect data from a specific Anvil! node.\n";
foreach my $anvil_name (sort {$a cmp $b} keys %{$anvil->data->{anvils}{anvil_name}})
{
print "- Name: [".$anvil_name."], UUID: [".$anvil->data->{anvils}{anvil_name}{$anvil_name}{anvil_uuid}."]\n";
}
$anvil->nice_exit({exit_code => 0});
}
# Make sure the anvil is valid.
my ($anvil_name, $anvil_uuid) = $anvil->Get->anvil_from_switch({
debug => 2,
anvil => $anvil->data->{switches}{anvil},
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:anvil_name' => $anvil_name,
's2:anvil_uuid' => $anvil_uuid,
}});
if (not $anvil_name)
{
# Bad name.
print "[ Error ] - Unable to get the Anvil! name and UUID from the string: [".$anvil->data->{switches}{anvil}."]\n";
$anvil->nice_exit({exit_code => 1});
}
# Add the host_uuids to the collect_from array.
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
push @{$anvil->data->{collect_from}}, $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
}
if ($anvil->data->{switches}{hosts})
{
if ($anvil->data->{switches}{hosts} eq "#!SET!#")
{
# Show a list of all machines.
print "Available Anvil! cluster systems. Use '--host <comma-separated list of names or UUIDs>' to collect data from specific hosts.\n";
foreach my $host_type ("striker", "node", "dr")
{
print "- Striker Dashboards:\n" if $host_type eq "striker";
print "\n- Anvil! sub-nodes:\n" if $host_type eq "node";
print "\n- Disaster recovery hosts:\n" if $host_type eq "dr";
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
next if $host_type ne $this_host_type;
print " - Host: [".$host_name."], UUID: [".$host_uuid."]\n";
}
}
$anvil->nice_exit({exit_code => 0});
}
foreach my $host (split/,/, $anvil->data->{switches}{hosts})
{
# Make sure this host is valid.
my ($host_uuid) = $anvil->Database->get_host_uuid_from_string({string => $host});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host' => $host,
's2:host_uuid' => $host_uuid,
}});
if (not $host_uuid)
{
print "[ Error ] - Unable to get the host UUID from the host string: [".$host."]\n";
$anvil->nice_exit({exit_code => 1});
}
push @{$anvil->data->{collect_from}}, $host_uuid;
}
}
# If we were restricted to an anvil or host, make sure we've added the Strikers.
if (($anvil->data->{switches}{anvil}) or ($anvil->data->{switches}{hosts}))
{
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{sys}{hosts}{by_name}})
{
my $host_uuid = $anvil->data->{sys}{hosts}{by_name}{$host_name};
my $this_host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_name' => $host_name,
's2:host_uuid' => $host_uuid,
's3:this_host_type' => $this_host_type,
}});
next if $this_host_type ne "striker";
my $seen = 0;
foreach my $this_host_uuid (@{$anvil->data->{collect_from}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:this_host_uuid' => $this_host_uuid,
's2:host_uuid' => $host_uuid,
}});
if ($this_host_uuid eq $host_uuid)
{
$seen = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seen => $seen }});
last;
}
}
if (not $seen)
{
push @{$anvil->data->{collect_from}}, $host_uuid;
}
}
}
return(0);
}
Loading…
Cancel
Save