Got anvil-watch-servers showing the status of subnodes.

* Updated System->maintenance_mode() to take 'host_uuid' so that the
  maintenance mode of remote machines can be checked/set.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 1 year ago
parent a8b1934352
commit 207a014ae0
  1. 23
      Anvil/Tools/System.pm
  2. 30
      man/anvil-watch-servers.8
  3. 14
      share/words.xml
  4. 1
      tools/anvil-watch-power
  5. 370
      tools/anvil-watch-servers

@ -3597,6 +3597,10 @@ This returns C<< 1 >> if maintenance mode is enabled and C<< 0 >> if disabled.
Parameters;
=head3 host_uuid (optional, default 'Get->host_uuid')
If set, this can check or set the maintenance mode on another host.
=head3 set (optional)
If this is set to C<< 1 >>, maintenance mode is enabled. If this is set to C<< 0 >>, maintenance mode is disabled.
@ -3610,8 +3614,18 @@ sub maintenance_mode
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "System->maintenance_mode()" }});
my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : "";
my $set = defined $parameter->{set} ? $parameter->{set} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { set => $set }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
host_uuid => $host_uuid,
set => $set,
}});
if (not $host_uuid)
{
$host_uuid = $anvil->Get->host_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }});
}
if (($set) or ($set eq "0"))
{
@ -3627,7 +3641,7 @@ sub maintenance_mode
variable_default => "0",
variable_description => "striker_0087",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_uuid => $host_uuid,
variable_source_table => "hosts",
});
}
@ -3641,7 +3655,7 @@ sub maintenance_mode
variable_default => "0",
variable_description => "striker_0087",
variable_section => "system",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_uuid => $host_uuid,
variable_source_table => "hosts",
});
}
@ -3657,10 +3671,9 @@ sub maintenance_mode
debug => $debug,
variable_name => "maintenance_mode",
variable_source_table => "hosts",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_uuid => $host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
debug => $debug,
maintenance_mode => $maintenance_mode,
variable_uuid => $variable_uuid,
modified_date => $modified_date,

@ -0,0 +1,30 @@
.\" Manpage for the Anvil! server system manager
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-watch-servers "8" "November 27 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-watch-servers \- Tool used to watch the status of servers on an Anvil! node.
.SH SYNOPSIS
.B anvil-watch-servers
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
anvil-watch-servers \- This is designed to be run on an Anvil! node to monitor the status of servers it is hosting. It is not designed for use on Strikers or to monitor multiple nodes (yet).
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-watch\fR <interval, default '2'>
Without this switch, the state of the servers is shown as it is now, and then the program exists. With this switch, the program will stay active, refreshing every X seconds, as set with this switch. The default is to refresh every 2 seconds.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -1179,6 +1179,10 @@ resource #!variable!server!# {
<key name="header_0114">Completed Jobs</key>
<key name="header_0115">-=] DR Hosts</key>
<key name="header_0116">-=] Servers</key>
<key name="header_0117">Subnode</key>
<key name="header_0118">Host State</key>
<key name="header_0119">Pacemaker State</key>
<key name="header_0120">Maintenance Mode</key>
<!-- Strings used by jobs -->
<key name="job_0001">Configure Network</key>
@ -3202,7 +3206,7 @@ Proceed? [y/N]</key>
<key name="message_0336">- Target: [#!variable!device_target!#], boot: [#!variable!say_boot!#], Replication Volume: [#!variable!drbd_resource!#/#!variable!drbd_volume!#]</key>
<key name="message_0337"> |- Resource / LV / Metadata sizes: [#!variable!resource_size!# / #!variable!lv_size!# / #!variable!metadata_size!#], free space: [#!variable!max_free_space!#]</key>
<key name="message_0338">- Target: [#!variable!device_target!#], boot: [#!variable!say_boot!#], ISO: [#!variable!device_path!#]</key>
<key name="message_0339">Sub-Nodes:</key>
<key name="message_0339">Subnodes:</key>
<key name="message_0340">DR Hosts:</key>
<key name="message_0341"> |- Name: [#!variable!host_name!#], UUID: [#!ariable!host_uuid!#]</key>
<key name="message_0342"> |- Volume: [#!variable!volume_number!#], backing device: [#!variable!backing_disk!#], DRBD minor: [#!variable!device_minor!#], size: [#!variable!volume_size!#]
@ -3622,6 +3626,14 @@ If you are comfortable that the target has changed for a known reason, you can s
<key name="striker_0304">This indicates when, in unix time, the local install target data was updated.</key>
<key name="striker_0305">This indicates when, in unix time, the OUI data was last update. The OUI data is a list of MAC address prefixes and which companies they've been assigned to.</key>
<key name="striker_0306">This indicates when, in unix time, the network was last scanned. This is done to determine what IPs are used by servers on the Anvil! node, and to try to identify foundation pack devices on the network. These scans are simple ping sweeps used to get the MAC addresses of devices with IPs.</key>
<key name="striker_0307">Powered Off</key>
<key name="striker_0308">Online</key>
<key name="striker_0309">Stopping</key>
<key name="striker_0310">Booting</key>
<key name="striker_0311">Offline</key>
<key name="striker_0312">Transitioning</key>
<key name="striker_0313">Maintenance Mode</key>
<key name="striker_0314">Normal Operation</key>
<!-- These are generally units and appended to numbers -->
<key name="suffix_0001">#!variable!number!#/sec</key>

@ -43,6 +43,7 @@ while(1)
if ($anvil->data->{sys}{database}{connections})
{
show_power_data($anvil);
$anvil->Database->disconnect();
}
else
{

@ -0,0 +1,370 @@
#!/usr/bin/perl
use strict;
use warnings;
use Anvil::Tools;
use Data::Dumper;
use Text::Diff;
use Term::Cap;
use Time::Local;
$| = 1;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $anvil = Anvil::Tools->new();
# Get a list of all interfaces with IP addresses.
$anvil->Get->switches({debug => 2, list => ["watch"]});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132" });
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
$anvil->Log->entry({ source => $THIS_FILE, line => __LINE__, level => 0, 'print' => 1, priority => "err", key => "error_0003" });
$anvil->nice_exit({ exit_code => 1 });
}
our $t = Term::Cap->Tgetent;
# One shot or continuous?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::watch' => $anvil->data->{switches}{watch},
}});
if ($anvil->data->{switches}{watch})
{
# Disconnect before we go into the loop
$anvil->Database->disconnect();
# Do we have an interval?
my $interval = 2;
if ($anvil->data->{switches}{watch} =~ /^\d+$/)
{
$interval = $anvil->data->{switches}{watch};
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { interval => $interval }});
# Loop until terminated.
while(1)
{
$anvil->refresh();
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
if ($anvil->data->{sys}{database}{connections})
{
show_status($anvil);
$anvil->Database->disconnect();
}
else
{
# No databases available.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "log_0738"});
}
sleep $interval;
}
}
else
{
# Once and exit.
$anvil->Database->connect();
show_status($anvil);
}
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
sub show_status
{
my ($anvil) = @_;
if ($anvil->data->{switches}{watch})
{
system('clear');
print $t->Tgoto("cm", 0, 0);
}
if ($anvil->data->{switches}{watch})
{
my $date = $anvil->Get->date_and_time();
print "-=] Updated: ".$date." - Press '<ctrl> + <c>' to exit\n";
}
### TODO: Add support for checking/monitoring DR hosts
# Get the node states
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type ne "node")
{
print "This must be run on a subnode. Exiting.\n";
$anvil->nice_exit({exit_code => 1});
}
### TODO: Make this work outside the cluster, for cases when servers are running outside the
### pacemaker cluster stack.
# Are we a cluster member?
my $problem = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
print "This subnode is not in the cluster (failed to parse the CIB). Exiting.\n";
$anvil->nice_exit({exit_code => 1});
}
# Load host information so that we can check for IPMI configs, if needed.
$anvil->Database->get_hosts();
$anvil->Database->get_anvils();
show_servers($anvil);
print "\n";
show_nodes($anvil);
return(0);
}
sub show_servers
{
my ($anvil) = @_;
# Show the server states
return(0);
}
sub show_nodes
{
my ($anvil) = @_;
# Headers
$anvil->data->{'say'}{subnode} = $anvil->Words->string({key => "header_0117"});
$anvil->data->{'say'}{host_status} = $anvil->Words->string({key => "header_0118"});
$anvil->data->{'say'}{pacemaker_status} = $anvil->Words->string({key => "header_0119"});
$anvil->data->{'say'}{maintenance_mode} = $anvil->Words->string({key => "header_0120"});
my $longest_node_name = length($anvil->data->{'say'}{subnode});
my $longest_host_status = length($anvil->data->{'say'}{host_status});
my $longest_pacemaker_status = length($anvil->data->{'say'}{pacemaker_status});
my $longest_maintenance_mode = length($anvil->data->{'say'}{maintenance_mode});
### Strings
# host states
$anvil->data->{'say'}{unknown} = $anvil->Words->string({key => "unit_0004"});
$anvil->data->{'say'}{online} = $anvil->Words->string({key => "striker_0308"});
$anvil->data->{'say'}{powered_off} = $anvil->Words->string({key => "striker_0307"});
$anvil->data->{'say'}{stopping} = $anvil->Words->string({key => "striker_0309"});
$anvil->data->{'say'}{booting} = $anvil->Words->string({key => "striker_0310"});
# Cluster states (online from above)
$anvil->data->{'say'}{offline} = $anvil->Words->string({key => "striker_0311"});
$anvil->data->{'say'}{transitioning} = $anvil->Words->string({key => "striker_0312"});
# Maintenance mode.
$anvil->data->{'say'}{maintenance_mode} = $anvil->Words->string({key => "striker_0313"});
$anvil->data->{'say'}{normal_operation} = $anvil->Words->string({key => "striker_0314"});
# Get the length of the node strings.
foreach my $node_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}})
{
my $host_uuid = $anvil->Database->get_host_uuid_from_string({string => $node_name});
my $host_status = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_status};
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:node_name' => $node_name,
's2:host_uuid' => $host_uuid,
's3:host_status' => $host_status,
's4:maintenance_mode' => $maintenance_mode,
's5:in_ccm' => $in_ccm,
's6:crmd' => $crmd,
's7:join' => $join,
's8:ready' => $ready,
}});
# Convert the host state to a string.
my $say_host_status = $anvil->data->{'say'}{unknown};
if ($host_status eq "online")
{
$say_host_status = $anvil->data->{'say'}{online};
}
elsif ($host_status eq "powered off")
{
$say_host_status = $anvil->data->{'say'}{powered_off};
}
elsif ($host_status eq "stopping")
{
$say_host_status = $anvil->data->{'say'}{stopping};
}
elsif ($host_status eq "booting")
{
$say_host_status = $anvil->data->{'say'}{booting};
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_host_status => $say_host_status }});
# Convert the Pacemaker state.
my $say_pacemaker_status = $anvil->data->{'say'}{unknown};
if ($ready)
{
$say_pacemaker_status = $anvil->data->{'say'}{online};
}
elsif (($in_ccm) or ($crmd) or ($join))
{
# Transitioning
$say_pacemaker_status = $anvil->data->{'say'}{transitioning};
}
else
{
$say_pacemaker_status = $anvil->data->{'say'}{offline};
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_pacemaker_status => $say_pacemaker_status }});
# Maintenance mode
my $anvil_maintenance_mode = $anvil->System->maintenance_mode({host_uuid => $host_uuid});
my $say_maintenance_mode = (($maintenance_mode) or ($anvil_maintenance_mode)) ? $anvil->data->{'say'}{maintenance_mode} : $anvil->data->{'say'}{normal_operation};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_maintenance_mode => $anvil_maintenance_mode,
say_maintenance_mode => $say_maintenance_mode,
}});
# Update the lengths, if needed
if (length($node_name) > $longest_node_name)
{
$longest_node_name = length($node_name);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { longest_node_name => $longest_node_name }});
}
if (length($say_host_status) > $longest_host_status)
{
$longest_host_status = length($say_host_status);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { longest_host_status => $longest_host_status }});
}
if (length($say_pacemaker_status) > $longest_pacemaker_status)
{
$longest_pacemaker_status = length($say_pacemaker_status);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { longest_pacemaker_status => $longest_pacemaker_status }});
}
if (length($say_maintenance_mode) > $longest_maintenance_mode)
{
$longest_maintenance_mode = length($say_maintenance_mode);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { longest_maintenance_mode => $longest_maintenance_mode }});
}
}
=cut
Subnode Status:
+---------+---------------+--------------------+-------------------+
| Subnode | Host Status | Pacemaker Status | Maintenance Mode |
+---------+---------------+--------------------+-------------------+
| <node1> | <host status> | <Pacemaker status> | <mainteance mode> |
| <node2> | <Host status> | <Pacemaker status> | <mainteance mode> |
+---------+---------------+--------------------+-------------------+
Servers:
+-------------+----------------+------------------+-------------+-----------------+--------------+
| server name | <server state> | <resource state> | <host node> | <prefered host> | <dbrd fence> |
+-------------+----------------+------------------+-------------+-----------------+--------------+
=cut
# Now look again to show the subnode states
my $subnode_divider = ""; for (1..$longest_node_name) { $subnode_divider .= "-"; }
my $host_status_divider = ""; for (1..$longest_host_status) { $host_status_divider .= "-"; }
my $pacemaker_status_divider = ""; for (1..$longest_pacemaker_status) { $pacemaker_status_divider .= "-"; }
my $maintenance_mode_divider = ""; for (1..$longest_maintenance_mode) { $maintenance_mode_divider .= "-"; }
my $say_subnode_header = $anvil->Words->center_text({string => $anvil->data->{'say'}{subnode}, width => $longest_node_name});
my $say_host_status_header = $anvil->Words->center_text({string => $anvil->data->{'say'}{host_status}, width => $longest_host_status});
my $say_pacemaker_status_header = $anvil->Words->center_text({string => $anvil->data->{'say'}{pacemaker_status}, width => $longest_pacemaker_status});
my $say_maintenance_mode_header = $anvil->Words->center_text({string => $anvil->data->{'say'}{maintenance_mode}, width => $longest_maintenance_mode});
my $divider_line = "+-".$subnode_divider."-+-".$host_status_divider."-+-".$pacemaker_status_divider."-+-".$maintenance_mode_divider."-+\n";
print $divider_line;
print "| ".$say_subnode_header." | ".$say_host_status_header." | ".$say_pacemaker_status_header." | ".$say_maintenance_mode_header." |\n";
print $divider_line;
foreach my $node_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}})
{
my $host_uuid = $anvil->Database->get_host_uuid_from_string({string => $node_name});
my $host_status = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_status};
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:node_name' => $node_name,
's2:host_uuid' => $host_uuid,
's3:host_status' => $host_status,
's4:maintenance_mode' => $maintenance_mode,
's5:in_ccm' => $in_ccm,
's6:crmd' => $crmd,
's7:join' => $join,
's8:ready' => $ready,
}});
# Convert the host state to a string.
my $say_host_status = $anvil->data->{'say'}{unknown};
if ($host_status eq "online")
{
$say_host_status = $anvil->data->{'say'}{online};
}
elsif ($host_status eq "powered off")
{
$say_host_status = $anvil->data->{'say'}{powered_off};
}
elsif ($host_status eq "stopping")
{
$say_host_status = $anvil->data->{'say'}{stopping};
}
elsif ($host_status eq "booting")
{
$say_host_status = $anvil->data->{'say'}{booting};
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_host_status => $say_host_status }});
# Convert the Pacemaker state.
my $say_pacemaker_status = $anvil->data->{'say'}{unknown};
if ($ready)
{
$say_pacemaker_status = $anvil->data->{'say'}{online};
}
elsif (($in_ccm) or ($crmd) or ($join))
{
# Transitioning
$say_pacemaker_status = $anvil->data->{'say'}{transitioning};
}
else
{
$say_pacemaker_status = $anvil->data->{'say'}{offline};
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_pacemaker_status => $say_pacemaker_status }});
# Maintenance mode
my $anvil_maintenance_mode = $anvil->System->maintenance_mode({host_uuid => $host_uuid});
my $say_maintenance_mode = (($maintenance_mode) or ($anvil_maintenance_mode)) ? $anvil->data->{'say'}{maintenance_mode} : $anvil->data->{'say'}{normal_operation};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_maintenance_mode => $anvil_maintenance_mode,
say_maintenance_mode => $say_maintenance_mode,
}});
print "| ";
print sprintf("%-${longest_node_name}s", $node_name)." | ";
print sprintf("%-${longest_host_status}s", $say_host_status)." | ";
print sprintf("%-${longest_pacemaker_status}s", $say_pacemaker_status)." | ";
print sprintf("%-${longest_maintenance_mode}s", $say_maintenance_mode)." |\n";
}
print $divider_line;
return(0);
}
Loading…
Cancel
Save