From 96bc1f0b78b909ebf50cae6a43e8a90f93a9c38b Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 21 Dec 2020 16:00:35 -0500 Subject: [PATCH] * Created Convert->fence_ipmilan_to_ipmitool() that takes a 'fence_ipmilan' call and converts it into a direct 'ipmitool' call. * Created Database->get_power() that loads data from the special 'power' table. * Fixed a bug in calls to Network->ping() where some weren't formatted properly for receiving two string variables. * Updated Database->get_anvils() to record the machine types when recording host information. * Updated Database->get_hosts_info() to also load the 'host_ipmi' column. * Updated Database->get_upses() to store the link to the 'power' -> 'power_uuid', when available. * Created ScanCore->call_scan_agents() that does the work of actually calling scan agents, moving the logic out from the scancore daemon. * Created ScanCore->check_power() that takes a host and the anvil it is in and returns if it's on batteries or not. If it is, the time on batteries and estimate hold-up time is returned. If not, the highest charge percentage is returned. * Created ScanCore->post_scan_analysis() that is a wrapper for calling the new ->post_scan_analysis_dr(), ->post_scan_analysis_node() and ->post_scan_analysis_striker(). Of which, _dr and _node are still empty, but _striker is complete. ** ->post_scan_analysis_striker() is complete. It now boots a node after a power loss if the UPSes powering it are OK (at least one has mains power, and the main-powered UPS(es) have reached the minimum charge percentage). If it's thermal, IPMI is called and so long as at least one thermal sensor is found and it/they are all OK, it is booted. For now, M2's thermal reboot delay logic hasn't been replicated, as it added a lot of complexity and didn't prove practically useful. * Created System->collect_ipmi_data() and moved 'scan_ipmitool's ipmitool call and parse into that method. This was done to allow ScanCore->post_scan_analysis_striker() to also call IPMI on a remote machine during thermal down events without reimplementing the logic. * Updated scan-ipmitool to only record temperature data for data collected locally. Also renamed 'machine' variables and hash keys to 'host_name' to clarify what is being stored. * Updated scancore to clear the 'system::stop_reason' variable. * Added missing packages to striker-manage-install-target. Signed-off-by: Digimer --- Anvil/Tools/Convert.pm | 95 ++ Anvil/Tools/Database.pm | 184 +++- Anvil/Tools/Network.pm | 18 +- Anvil/Tools/ScanCore.pm | 827 +++++++++++++++++- Anvil/Tools/System.pm | 304 +++++++ scancore-agents/scan-apc-pdu/scan-apc-pdu | 7 +- scancore-agents/scan-apc-ups/scan-apc-ups | 9 +- scancore-agents/scan-ipmitool/scan-ipmitool | 763 ++++++---------- .../scan-ipmitool/scan-ipmitool.xml | 20 +- share/words.xml | 20 + tools/scancore | 162 +--- tools/striker-manage-install-target | 56 +- tools/test.pl | 39 +- 13 files changed, 1821 insertions(+), 683 deletions(-) diff --git a/Anvil/Tools/Convert.pm b/Anvil/Tools/Convert.pm index ed00d1ae..c59464cf 100644 --- a/Anvil/Tools/Convert.pm +++ b/Anvil/Tools/Convert.pm @@ -17,6 +17,7 @@ my $THIS_FILE = "Convert.pm"; # celsius_to_fahrenheit # cidr # fahrenheit_to_celsius +# fence_ipmilan_to_ipmitool # format_mmddyy_to_yymmdd # host_name_to_ip # human_readable_to_bytes @@ -726,6 +727,100 @@ sub fahrenheit_to_celsius } +=head2 fence_ipmilan_to_ipmitool + +This takes a C<< fence_ipmilan >> command and converts it into an C<< ipmitool >> command. The C<< action >> is ignored, as this method is meant to be called when one machine wants to check the health of another machine. + +On error, C<< !!error!! >> is returned. Otherwise, a shell call and the user password will be returned as two values. + +Parameters; + +=head3 fence_ipmilan_command (required) + +This is the C<< fence_ipmilan >> command to be translated. + +=cut +sub fence_ipmilan_to_ipmitool +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Convert->fence_ipmilan_to_ipmitool()" }}); + + my $fence_ipmilan_command = defined $parameter->{fence_ipmilan_command} ? $parameter->{fence_ipmilan_command} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + fence_ipmilan_command => $fence_ipmilan_command, + }}); + + if (not $fence_ipmilan_command) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Convert->fence_ipmilan_to_ipmitool()", parameter => "fence_ipmilan_to_ipmitool" }}); + return("!!error!!", "!!error!!"); + } + elsif ($fence_ipmilan_command !~ /fence_ipmilan /) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0168", variables => { command => $fence_ipmilan_command }}); + return("!!error!!", "!!error!!"); + } + + my $ipmitool_command = $anvil->data->{path}{exe}{ipmitool}; + my $ipmi_password = ""; + if (($fence_ipmilan_command =~ /-A (.*?) /) or ($fence_ipmilan_command =~ /-auth (.*?) /)) + { + # IPMI Lan Auth type (md5, password, or none) + $ipmitool_command .= " -A ".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-a (.*?) /) or ($fence_ipmilan_command =~ /-ip (.*?) /)) + { + # IPMI Lan IP to talk to + $ipmitool_command .= " -H ".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-P /) or ($fence_ipmilan_command =~ /-lanplus /)) + { + # Use Lanplus to improve security of connection + $ipmitool_command .= " -I lanplus"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-l (.*?) /) or ($fence_ipmilan_command =~ /-username (.*?) /)) + { + # Username/Login (if required) to control power on IPMI device + $ipmitool_command .= " -U ".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-C (.*?) /) or ($fence_ipmilan_command =~ /-cipher (.*?) /)) + { + # Ciphersuite to use (same as ipmitool -C parameter) + $ipmitool_command .= " -C ".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-L (.*?) /) or ($fence_ipmilan_command =~ /-privlvl (.*?) /)) + { + # Privilege level on IPMI device + $ipmitool_command .= " -L ".$1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { ipmitool_command => $ipmitool_command }}); + } + if (($fence_ipmilan_command =~ /-p (.*?) -/) or ($fence_ipmilan_command =~ /-password (.*?) -/) or ($fence_ipmilan_command =~ /-password '(.*?)'/) or ($fence_ipmilan_command =~ /-password (.*)$/)) + { + # Password (if required) to control power on IPMI device + $ipmi_password = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, secure => 1, level => $debug, list => { ">> ipmi_password" => $ipmi_password }}); + + $ipmi_password =~ s/^'(.*?)'$/$1/; + $ipmi_password =~ s/\\'/'/g; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, secure => 1, level => $debug, list => { "<< ipmi_password" => $ipmi_password }}); + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + ipmitool_command => $ipmitool_command, + ipmi_password => $anvil->Log->is_secure($ipmi_password), + }}); + return($ipmitool_command, $ipmi_password); +} + + =head2 format_mmddyy_to_yymmdd This converts a C<< mm/dd/yy >> or C<< mm/dd/yyyy >> string into the more sensible yy/mm/dd or yyyy/mm/dd string. diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index ab8e5263..d5da2fe9 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -39,6 +39,7 @@ my $THIS_FILE = "Database.pm"; # get_servers # get_ssh_keys # get_tables_from_schema +# get_power # get_upses # initialize # insert_or_update_anvils @@ -1234,17 +1235,20 @@ sub connect if ((not $no_ping) && ($anvil->data->{database}{$uuid}{ping})) { # Can I ping? - my ($pinged) = $anvil->Network->ping({ + my ($pinged, $average_time) = $anvil->Network->ping({ debug => $debug, ping => $host, count => 1, timeout => $anvil->data->{database}{$uuid}{ping}, }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + pinged => $pinged, + average_time => $average_time, + }}); my $ping_time = tv_interval ($start_time, [gettimeofday]); #print "[".$ping_time."] - Pinged: [$host:$port:$name:$user]\n"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pinged => $pinged }}); if (not $pinged) { # Didn't ping and 'database::::ping' not set. Record this @@ -2015,27 +2019,33 @@ WHERE { $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{anvil_name} = $anvil_name; $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{anvil_uuid} = $anvil_uuid; + $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{role} = "node1"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "anvils::host_uuid::${anvil_node1_host_uuid}::anvil_name" => $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{anvil_name}, "anvils::host_uuid::${anvil_node1_host_uuid}::anvil_uuid" => $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{anvil_uuid}, + "anvils::host_uuid::${anvil_node1_host_uuid}::role" => $anvil->data->{anvils}{host_uuid}{$anvil_node1_host_uuid}{role}, }}); } if ($anvil_node2_host_uuid) { $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{anvil_name} = $anvil_name; $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{anvil_uuid} = $anvil_uuid; + $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{role} = "node2"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "anvils::host_uuid::${anvil_node2_host_uuid}::anvil_name" => $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{anvil_name}, "anvils::host_uuid::${anvil_node2_host_uuid}::anvil_uuid" => $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{anvil_uuid}, + "anvils::host_uuid::${anvil_node2_host_uuid}::role" => $anvil->data->{anvils}{host_uuid}{$anvil_node2_host_uuid}{role}, }}); } if ($anvil_dr1_host_uuid) { $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{anvil_name} = $anvil_name; $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{anvil_uuid} = $anvil_uuid; + $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{role} = "dr1"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "anvils::host_uuid::${anvil_dr1_host_uuid}::anvil_name" => $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{anvil_name}, "anvils::host_uuid::${anvil_dr1_host_uuid}::anvil_uuid" => $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{anvil_uuid}, + "anvils::host_uuid::${anvil_dr1_host_uuid}::role" => $anvil->data->{anvils}{host_uuid}{$anvil_dr1_host_uuid}{role}, }}); } } @@ -2368,12 +2378,16 @@ sub get_hosts_info my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_hosts_info()" }}); + # Load anvil data so we can find passwords. + $anvil->Database->get_anvils({debug => $debug}); + my $query = " SELECT host_uuid, host_name, host_type, - host_key + host_key, + host_ipmi FROM hosts ;"; @@ -2391,21 +2405,52 @@ FROM my $host_name = $row->[1]; my $host_type = $row->[2]; my $host_key = $row->[3]; + my $host_ipmi = $row->[4]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid, host_name => $host_name, host_type => $host_type, host_key => $host_key, + host_ipmi => $anvil->Log->is_secure($host_ipmi), }}); $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_name} = $host_name; $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_type} = $host_type; $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_key} = $host_key; + $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi} = $host_ipmi; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "machine::host_uuid::${host_uuid}::hosts::host_name" => $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_name}, "machine::host_uuid::${host_uuid}::hosts::host_type" => $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_type}, "machine::host_uuid::${host_uuid}::hosts::host_key" => $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_key}, + "machine::host_uuid::${host_uuid}::hosts::host_ipmi" => $anvil->Log->is_secure($anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi}), }}); + # If this is an Anvil! member, pull it's IP. + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{name} = ""; + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{uuid} = ""; + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{role} = ""; + $anvil->data->{machine}{host_uuid}{$host_uuid}{password} = ""; + if (exists $anvil->data->{anvils}{host_uuid}{$host_uuid}) + { + my $anvil_uuid = $anvil->data->{anvils}{host_uuid}{$host_uuid}{anvil_uuid}; + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{name} = $anvil->data->{anvils}{host_uuid}{$host_uuid}{anvil_name}; + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{uuid} = $anvil_uuid; + $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{role} = $anvil->data->{anvils}{host_uuid}{$host_uuid}{role}; + $anvil->data->{machine}{host_uuid}{$host_uuid}{password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "machine::host_uuid::${host_uuid}::anvil::name" => $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{name}, + "machine::host_uuid::${host_uuid}::anvil::uuid" => $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{uuid}, + "machine::host_uuid::${host_uuid}::anvil::role" => $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{role}, + "machine::host_uuid::${host_uuid}::password" => $anvil->Log->is_secure($anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{password}), + }}); + } + elsif (exists $anvil->data->{database}{$host_uuid}) + { + $anvil->data->{machine}{host_uuid}{$host_uuid}{password} = $anvil->data->{database}{$host_uuid}{password}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "machine::host_uuid::${host_uuid}::password" => $anvil->Log->is_secure($anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{password}), + }}); + } + # Read in the variables. my $query = " SELECT @@ -2436,8 +2481,9 @@ AND }}); } - # Read in the IP addresses and network information. - + # Read in the IP addresses and network information. Data is loaded under + # 'network::host_uuid::x'. + $anvil->Network->load_interfces({debug => $debug, host_uuid => $host_uuid}); } return(0); @@ -3840,6 +3886,111 @@ sub get_tables_from_schema } +=head2 get_power + +This loads the special C<< power >> table, which complements the C<< upses >> table. This helps ScanCore determine when nodes need to shut down or can be power back up during power events. + +* power::power_uuid::::power_ups_uuid +* power::power_uuid::::power_on_battery +* power::power_uuid::::power_seconds_left +* power::power_uuid::::power_charge_percentage +* power::power_uuid::::modified_date_unix + +And, to allow for lookup by name; + +* power::power_ups_uuid::::power_uuid +* power::power_ups_uuid::::power_on_battery +* power::power_ups_uuid::::power_seconds_left +* power::power_ups_uuid::::power_charge_percentage +* power::power_ups_uuid::::modified_date_unix + +B<< Note >>: The C<< modified_date >> is cast as a unix time stamp. + +If the hash was already populated, it is cleared before repopulating to ensure no stray data remains. + +This method takes no parameters. + +=cut +sub get_power +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_power()" }}); + + if (exists $anvil->data->{power}) + { + delete $anvil->data->{power}; + } + + my $query = " +SELECT + power_uuid, + power_ups_uuid, + power_on_battery, + power_seconds_left, + power_charge_percentage, + round(extract(epoch from modified_date)) +FROM + power +;"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + foreach my $row (@{$results}) + { + my $power_uuid = $row->[0]; + my $power_ups_uuid = $row->[1]; + my $power_on_battery = $row->[2]; + my $power_seconds_left = $row->[3]; + my $power_charge_percentage = $row->[4]; + my $modified_date_unix = $row->[5]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + power_uuid => $power_uuid, + power_ups_uuid => $power_ups_uuid, + power_on_battery => $power_on_battery, + power_seconds_left => $power_seconds_left, + power_charge_percentage => $power_charge_percentage, + modified_date_unix => $modified_date_unix, + }}); + + # Record the data in the hash, too. + $anvil->data->{power}{power_uuid}{$power_uuid}{power_ups_uuid} = $power_ups_uuid; + $anvil->data->{power}{power_uuid}{$power_uuid}{power_on_battery} = $power_on_battery; + $anvil->data->{power}{power_uuid}{$power_uuid}{power_seconds_left} = $power_seconds_left; + $anvil->data->{power}{power_uuid}{$power_uuid}{power_charge_percentage} = $power_charge_percentage; + $anvil->data->{power}{power_uuid}{$power_uuid}{modified_date_unix} = $modified_date_unix; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "power::power_uuid::${power_uuid}::power_ups_uuid" => $anvil->data->{power}{power_uuid}{$power_uuid}{power_ups_uuid}, + "power::power_uuid::${power_uuid}::power_on_battery" => $anvil->data->{power}{power_uuid}{$power_uuid}{power_on_battery}, + "power::power_uuid::${power_uuid}::power_seconds_left" => $anvil->data->{power}{power_uuid}{$power_uuid}{power_seconds_left}, + "power::power_uuid::${power_uuid}::power_charge_percentage" => $anvil->data->{power}{power_uuid}{$power_uuid}{power_charge_percentage}, + "power::power_uuid::${power_uuid}::modified_date_unix" => $anvil->data->{power}{power_uuid}{$power_uuid}{modified_date_unix}, + }}); + + $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_uuid} = $power_uuid; + $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_on_battery} = $power_on_battery; + $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_seconds_left} = $power_seconds_left; + $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_charge_percentage} = $power_charge_percentage; + $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{modified_date_unix} = $modified_date_unix; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "power::power_ups_uuid::${power_ups_uuid}::power_uuid" => $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_uuid}, + "power::power_ups_uuid::${power_ups_uuid}::power_on_battery" => $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_on_battery}, + "power::power_ups_uuid::${power_ups_uuid}::power_seconds_left" => $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_seconds_left}, + "power::power_ups_uuid::${power_ups_uuid}::power_charge_percentage" => $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{power_charge_percentage}, + "power::power_ups_uuid::${power_ups_uuid}::modified_date_unix" => $anvil->data->{power}{power_ups_uuid}{$power_ups_uuid}{modified_date_unix}, + }}); + } + + return(0); +} + + =head2 get_upses This loads the known UPSes (uninterruptible power supplies) into the C<< anvil::data >> hash at: @@ -3848,6 +3999,7 @@ This loads the known UPSes (uninterruptible power supplies) into the C<< anvil:: * upses::ups_uuid::::ups_agent * upses::ups_uuid::::ups_ip_address * upses::ups_uuid::::modified_date +* upses::ups_uuid::::power_uuid And, to allow for lookup by name; @@ -3855,11 +4007,14 @@ And, to allow for lookup by name; * upses::ups_name::::ups_agent * upses::ups_name::::ups_ip_address * upses::ups_name::::modified_date +* upses::ups_name::::power_uuid If the hash was already populated, it is cleared before repopulating to ensure no stray data remains. B<>: Deleted devices (ones where C<< ups_ip_address >> is set to C<< DELETED >>) are ignored. See the C<< include_deleted >> parameter to include them. +B<< Note>>: If a scan agent has scanned this UPS, it's power state information will be stored in the C<< power >> table. If a matching record is found, the C<< power_uuid >> will be stored in the C<< ...::power_uuid >> hash references. For this linking to work, this method will call C<< Database->get_power >>. + Parameters; =head3 include_deleted (Optional, default 0) @@ -3885,6 +4040,9 @@ sub get_upses delete $anvil->data->{upses}; } + # Load the power data. + $anvil->Database->get_power({debug => $debug}); + my $query = " SELECT ups_uuid, @@ -3929,6 +4087,7 @@ WHERE $anvil->data->{upses}{ups_uuid}{$ups_uuid}{ups_agent} = $ups_agent; $anvil->data->{upses}{ups_uuid}{$ups_uuid}{ups_ip_address} = $ups_ip_address; $anvil->data->{upses}{ups_uuid}{$ups_uuid}{modified_date} = $modified_date; + $anvil->data->{upses}{ups_uuid}{$ups_uuid}{power_uuid} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "upses::ups_uuid::${ups_uuid}::ups_name" => $anvil->data->{upses}{ups_uuid}{$ups_uuid}{ups_name}, "upses::ups_uuid::${ups_uuid}::ups_agent" => $anvil->data->{upses}{ups_uuid}{$ups_uuid}{ups_agent}, @@ -3940,12 +4099,27 @@ WHERE $anvil->data->{upses}{ups_name}{$ups_name}{ups_agent} = $ups_agent; $anvil->data->{upses}{ups_name}{$ups_name}{ups_ip_address} = $ups_ip_address; $anvil->data->{upses}{ups_name}{$ups_name}{modified_date} = $modified_date; + $anvil->data->{upses}{ups_name}{$ups_name}{power_uuid} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "upses::ups_name::${ups_name}::ups_uuid" => $anvil->data->{upses}{ups_name}{$ups_name}{ups_uuid}, "upses::ups_name::${ups_name}::ups_agent" => $anvil->data->{upses}{ups_name}{$ups_name}{ups_agent}, "upses::ups_name::${ups_name}::ups_ip_address" => $anvil->data->{upses}{ups_name}{$ups_name}{ups_ip_address}, "upses::ups_name::${ups_name}::modified_date" => $anvil->data->{upses}{ups_name}{$ups_name}{modified_date}, }}); + + # Collect power information from 'power'. + if (exists $anvil->data->{power}{power_ups_uuid}{$ups_uuid}) + { + my $power_uuid = $anvil->data->{power}{power_ups_uuid}{$ups_uuid}{power_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { power_uuid => $power_uuid }}); + + $anvil->data->{upses}{ups_uuid}{$ups_uuid}{power_uuid} = $power_uuid; + $anvil->data->{upses}{ups_name}{$ups_name}{power_uuid} = $power_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "upses::ups_uuid::${ups_uuid}::power_ups_uuid" => $anvil->data->{upses}{ups_uuid}{$ups_uuid}{power_ups_uuid}, + "upses::ups_name::${ups_name}::power_ups_uuid" => $anvil->data->{upses}{ups_name}{$ups_name}{power_ups_uuid}, + }}); + } } return(0); diff --git a/Anvil/Tools/Network.pm b/Anvil/Tools/Network.pm index 51df6bb3..856a5ba4 100755 --- a/Anvil/Tools/Network.pm +++ b/Anvil/Tools/Network.pm @@ -308,7 +308,7 @@ sub check_internet next; } - my $pinged = $anvil->Network->ping({ + my ($pinged, $average_time) = $anvil->Network->ping({ debug => $debug, target => $target, port => $port, @@ -317,7 +317,10 @@ sub check_internet ping => $domain, count => 3, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pinged => $pinged }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + pinged => $pinged, + average_time => $average_time, + }}); if ($pinged) { $access = 1; @@ -2058,13 +2061,13 @@ This method will attempt to ping a target, by host name or IP, and returns C<< 1 Example; # Test access to the internet. Allow for three attempts to account for network jitter. - my $pinged = $anvil->Network->ping({ + my ($pinged, $average_time) = $anvil->Network->ping({ ping => "google.ca", count => 3, }); # Test 9000-byte jumbo-frame access to a target over the BCN. - my $jumbo_to_peer = $anvil->Network->ping({ + my ($jumbo_to_peer, $average_time) = $anvil->Network->ping({ ping => "an-a01n02.bcn", count => 1, payload => 9000, @@ -2072,7 +2075,7 @@ Example; }); # Check to see if an Anvil! node has internet access - my $pinged = $anvil->Network->ping({ + my ($pinged, $average_time) = $anvil->Network->ping({ target => "an-a01n01.alteeve.com", port => 22, password => "super secret", @@ -2181,7 +2184,7 @@ sub ping { $shell_call = $anvil->data->{path}{exe}{timeout}." $timeout "; } - $shell_call .= $anvil->data->{path}{exe}{'ping'}." -W 1 -n $ping -c 1"; + $shell_call .= $anvil->data->{path}{exe}{'ping'}." -W 1 -n ".$ping." -c 1"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); if (not $fragment) { @@ -2199,8 +2202,8 @@ sub ping my $average_ping_time = 0; foreach my $try (1..$count) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { count => $count, try => $try }}); last if $pinged; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { count => $count, try => $try }}); my $output = ""; my $error = ""; @@ -2249,6 +2252,7 @@ sub ping # Contact! $pinged = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pinged => $pinged }}); + last; } else { diff --git a/Anvil/Tools/ScanCore.pm b/Anvil/Tools/ScanCore.pm index cc8a4d00..8eeb8611 100755 --- a/Anvil/Tools/ScanCore.pm +++ b/Anvil/Tools/ScanCore.pm @@ -16,6 +16,13 @@ my $THIS_FILE = "ScanCore.pm"; ### Methods; # agent_startup +# call_scan_agents +# check_power +# post_scan_analysis +# post_scan_analysis_dr +# post_scan_analysis_node +# post_scan_analysis_striker +# _scan_directory =pod @@ -78,17 +85,6 @@ sub parent ############################################################################################################# -# =head3 -# -# Private Functions; -# -# =cut - -############################################################################################################# -# Private functions # -############################################################################################################# - - =head2 agent_startup This method handles connecting to the databases, loading the agent's schema, resync'ing database tables if needed and reading in the words files. @@ -198,4 +194,813 @@ sub agent_startup } +=head2 call_scan_agents + +This method calls all scan agents found on this system. It looks under the C<< path::directories::scan_agents >> directory (and subdirectories) for scan agents. + +This method takes no parameters. + +=cut +sub call_scan_agents +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->call_scan_agents()" }}); + + # Get the current list of scan agents on this system. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "path::directories::scan_agents" => $anvil->data->{path}{directories}{scan_agents}, + }}); + $anvil->ScanCore->_scan_directory({directory => $anvil->data->{path}{directories}{scan_agents}}); + + # Now loop through the agents I found and call them. + my $timeout = 30; + if ((exists $anvil->data->{scancore}{timing}{agent_runtime}) && ($anvil->data->{scancore}{timing}{agent_runtime} =~ /^\d+$/)) + { + $timeout = $anvil->data->{scancore}{timing}{agent_runtime}; + } + foreach my $agent_name (sort {$a cmp $b} keys %{$anvil->data->{scancore}{agent}}) + { + my $agent_path = $anvil->data->{scancore}{agent}{$agent_name}; + my $agent_words = $agent_path.".xml"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + agent_name => $agent_name, + agent_path => $agent_path, + agent_words => $agent_words, + }}); + + if ((-e $agent_words) && (-r $agent_words)) + { + # Read the words file so that we can generate alerts later. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0251", variables => { + agent_name => $agent_name, + file => $agent_words, + }}); + $anvil->Words->read({file => $agent_words}); + } + + # Set the timeout. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + agent_name => $agent_name, + "scancore::${agent_name}::timeout" => $anvil->data->{scancore}{$agent_name}{timeout}, + }}); + + # Now call the agent. + my $start_time = time; + if (($anvil->data->{scancore}{$agent_name}{timeout}) && ($anvil->data->{scancore}{$agent_name}{timeout} =~ /^\d+$/)) + { + $timeout = $anvil->data->{scancore}{$agent_name}{timeout}; + } + my $shell_call = $agent_path; + if ($anvil->data->{sys}{'log'}{level}) + { + $shell_call .= " ".$anvil->data->{sys}{'log'}{level}; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + # Tell the user this agent is about to run... + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => $debug, key => "log_0252", variables => { + agent_name => $agent_name, + timeout => $timeout, + }}); + my ($output, $return_code) = $anvil->System->call({timeout => $timeout, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { output => $output, return_code => $return_code }}); + foreach my $line (split/\n/, $output) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }}); + } + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => $debug, key => "log_0557", variables => { + agent_name => $agent_name, + runtime => (time - $start_time), + return_code => $return_code, + }}); + + # If the return code is '124', timeout popped. + if ($return_code eq "124") + { + ### TODO: Check if this alert was set so it only goes out once. + # Register an alert... + $anvil->Alert->register({set_by => $THIS_FILE, alert_level => "notice", message => "message_0180,!!agent_name!".$agent_name."!!,!!timeout!".$timeout."!!"}); + } + } + + return(0); +} + + +=head2 check_power + +This method checks the health of the UPSes powering a node. + +The power health, the shortest "time on batteries", the highest charge percentage and etimated hold-up time are returned. + +Power health values; +* '!!error!!' - There was a missing input variable. +* 0 - No UPSes found for the host +* 1 - One or more UPSes found and at least one has input power from mains. +* 2 - One or more UPSes found, all are running on battery. + +If the health is C<< 0 >>, all other values will also be C<< 0 >>. + +If the health is C<< 1 >>, the "time on batteries" and "estimated hold up time" will be C<< 0 >> and the highest charge percentage will be set. + +If the health is C<< 2 >>, the "time on batteries" will be the number of seconds since the last UPS to lose power was found to be running on batteries, The estimated hold up time of the strongest UPS is also returned in seconds. + +If no UPSes were found, health of '0' is returned (unknown). If If both/all UPSes are + +Parameters; + +=head3 anvil_uuid (required) + +This is the Anvil! UUID that the machine belongs to. This is required to find the manifest that shows which UPSes power the host. + +=head3 anvil_name (required) + +This is the Anvil! name that the machine is a member of. This is used for logging. + +=head3 host_uuid (required) + +This is the host's UUID that we're checking the UPSes powering it. + +=head3 host_name (required) + +This is the host's name that we're checking. This is used for logging. + +=cut +sub check_power +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->check_power()" }}); + + my $anvil_uuid = defined $parameter->{anvil_uuid} ? $parameter->{anvil_uuid} : ""; + my $anvil_name = defined $parameter->{anvil_name} ? $parameter->{anvil_name} : ""; + my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; + my $host_name = defined $parameter->{host_name} ? $parameter->{host_name} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + anvil_uuid => $anvil_uuid, + anvil_name => $anvil_name, + host_uuid => $host_uuid, + host_name => $host_name, + }}); + + if ((not $anvil_uuid) or (not $anvil_name) or (not $host_uuid) or (not $host_name)) + { + # Woops + return("!!error!!"); + } + + # We'll need the UPS data + $anvil->Database->get_upses({debug => $debug}); + + my $power_health = 0; + my $shorted_time_on_batteries = 99999; + my $highest_charge_percentage = 0; + my $estimated_hold_up_time = 0; + + my $query = "SELECT manifest_uuid FROM manifests WHERE manifest_name = ".$anvil->Database->quote($anvil_name).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if (not $count) + { + # Nothing we can do. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0569", variables => { + anvil_name => $anvil_name, + host_name => $host_name, + }}); + return($power_health, $shorted_time_on_batteries, $highest_charge_percentage, $estimated_hold_up_time) + } + + my $manifest_uuid = $results->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { manifest_uuid => $manifest_uuid }}); + + # Try to parse the manifest now. + if (not exists $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}) + { + my $problem = $anvil->Striker->load_manifest({ + debug => $debug, + manifest_uuid => $manifest_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + + if ($problem) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0569", variables => { + manifest_uuid => $manifest_uuid, + anvil_name => $anvil_name, + host_name => $host_name, + }}); + return($power_health, $shorted_time_on_batteries, $highest_charge_percentage, $estimated_hold_up_time) + } + } + + # If we're here, we can now look for the PDUs powering this host. + my $ups_count = 0; + my $ups_with_mains_found = 0; + foreach my $machine_type (sort {$a cmp $b} keys %{$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}}) + { + my $machine_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine_type}{name}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + machine_type => $machine_type, + machine_name => $machine_name, + }}); + next if $host_name !~ /$machine_name/; + + foreach my $ups_name (sort {$a cmp $b} keys %{$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine_type}{ups}}) + { + my $ups_uuid = $anvil->data->{upses}{ups_name}{$ups_name}{ups_uuid}; + my $ups_used = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine_type}{ups}{$ups_name}{used}; + my $power_uuid = $anvil->data->{upses}{ups_name}{$ups_name}{power_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + ups_name => $ups_name, + ups_uuid => $ups_uuid, + power_uuid => $power_uuid, + }}); + + if (($ups_used) && ($power_uuid)) + { + ### TODO: The power 'modified_time' is in unixtime. So we can see when the + ### UPS was last scanned. Later, we should think about how valid we + ### consider data over a certain age. + # What state is the UPS in? + $ups_count++; + my $power_on_battery = $anvil->data->{power}{power_uuid}{$power_uuid}{power_on_battery}; + my $power_seconds_left = $anvil->data->{power}{power_uuid}{$power_uuid}{power_seconds_left}; + my $power_charge_percentage = $anvil->data->{power}{power_uuid}{$power_uuid}{power_charge_percentage}; + my $modified_date_unix = $anvil->data->{power}{power_uuid}{$power_uuid}{modified_date_unix}; + my $time_now = time; + my $last_updated = $time_now - $modified_date_unix; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + ups_count => $ups_count, + power_on_battery => $power_on_battery, + power_seconds_left => $power_seconds_left." (".$anvil->Convert->time({'time' => $power_seconds_left, long => 1, translate => 1}).")", + power_charge_percentage => $power_charge_percentage."%", + modified_date_unix => $modified_date_unix, + time_now => $time_now, + last_updated => $last_updated." (".$anvil->Convert->time({'time' => $last_updated, long => 1, translate => 1}).")", + }}); + + if ($power_on_battery) + { + # We're on battery, so see what the hold up time is. + if (not $power_health) + { + # Set this to '2', if another UPS is on mains, it will change it to 1. + $power_health = 2; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { power_health => $power_health }}); + } + if ($power_seconds_left > $estimated_hold_up_time) + { + $estimated_hold_up_time = $power_seconds_left; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { estimated_hold_up_time => $estimated_hold_up_time }}); + } + + # How long has it been on batteries? + my $query = " +SELECT + round(extract(epoch from modified_date)) +FROM + history.power +WHERE + power_uuid = ".$anvil->Database->quote($power_uuid)." +AND + power_on_battery IS FALSE +ORDER BY + modified_date DESC +LIMIT 1 +;"; + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if (not $count) + { + # The only way this could happen is if we've never seen the UPS on mains... + $shorted_time_on_batteries = 0; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0571", variables => { + power_uuid => $power_uuid, + host_name => $host_name, + }}); + } + else + { + my $time_on_batteries = $results->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + time_on_batteries => $time_on_batteries." (".$anvil->Convert->time({'time' => $time_on_batteries, long => 1, translate => 1}).")", + }}); + + if ($time_on_batteries < $shorted_time_on_batteries) + { + $shorted_time_on_batteries = $shorted_time_on_batteries; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + shorted_time_on_batteries => $shorted_time_on_batteries." (".$anvil->Convert->time({'time' => $shorted_time_on_batteries, long => 1, translate => 1}).")", + }}); + } + } + } + else + { + # See how charged up this UPS is. + $power_health = 1; + $ups_with_mains_found = 1; + $shorted_time_on_batteries = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + ups_with_mains_found => $ups_with_mains_found, + shorted_time_on_batteries => $shorted_time_on_batteries, + }}); + + if ($power_charge_percentage > $highest_charge_percentage) + { + $highest_charge_percentage = $power_charge_percentage; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { highest_charge_percentage => $highest_charge_percentage }}); + } + } + } + } + } + + if ($ups_count) + { + # No UPSes found. + $shorted_time_on_batteries = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shorted_time_on_batteries => $shorted_time_on_batteries }}); + } + + return($power_health, $shorted_time_on_batteries, $highest_charge_percentage, $estimated_hold_up_time); +} + +=head2 post_scan_analysis + +This method contains the logic for the ScanCore "decision engine". The logic applied depends on the host type. + +This method takes no parameters. + +=cut +sub post_scan_analysis +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->post_scan_analysis()" }}); + + my $host_type = $anvil->Get->host_type; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_type => $host_type }}); + if ($host_type eq "striker") + { + $anvil->ScanCore->post_scan_analysis_striker({debug => $debug}) + } + elsif ($host_type eq "node") + { + $anvil->ScanCore->post_scan_analysis_node({debug => $debug}) + } + elsif ($host_type eq "dr") + { + $anvil->ScanCore->post_scan_analysis_dr({debug => $debug}) + } + + return(0); +} + + +=head2 post_scan_analysis_dr + +This runs through ScanCore post-scan analysis on DR hosts. + +This method takes no parameters; + +=cut +sub post_scan_analysis_dr +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->post_scan_analysis_dr()" }}); + + + + return(0); +} + + +=head2 post_scan_analysis_node + +This runs through ScanCore post-scan analysis on Anvil! nodes. + +This method takes no parameters; + +=cut +sub post_scan_analysis_node +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->post_scan_analysis_node()" }}); + + + + return(0); +} + + +=head2 post_scan_analysis_striker + +This runs through ScanCore post-scan analysis on Striker dashboards. + +This method takes no parameters; + +=cut +sub post_scan_analysis_striker +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->post_scan_analysis_striker()" }}); + + # We only boot nodes and DR hosts. Nodes get booted if 'variable_name = 'system::shutdown_reason' is + # set, or when a DR host is scheduled to boot. + $anvil->Database->get_hosts_info({debug => $debug}); + + # Get a look at all nodes and DR hosts. For each, check if they're up. + foreach my $host_uuid (keys %{$anvil->data->{machine}{host_uuid}}) + { + # Compile data. + my $host_name = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_name}; + my $host_type = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_type}; + my $host_key = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_key}; + my $host_ipmi = $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi}; + my $password = $anvil->data->{machine}{host_uuid}{$host_uuid}{password}; + my $anvil_name = $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{name}; + my $anvil_uuid = $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{uuid}; + my $anvil_role = $anvil->data->{machine}{host_uuid}{$host_uuid}{anvil}{role}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host_name => $host_name, + host_type => $host_type, + host_key => $host_key, + host_ipmi => $anvil->Log->is_secure($host_ipmi), + password => $anvil->Log->is_secure($password), + anvil_name => $anvil_name, + anvil_uuid => $anvil_uuid, + anvil_role => $anvil_role, + }}); + + ### TODO: Add an ability to mark which PDU powers a striker. If set, try logging into the + ### peer striker and if it fails, power cycle it (but only once per hour). + next if $host_type eq "striker"; + + ### TODO: Adding support for PDU resets would allow us to recover from crashed IPMI BMCs as + ### well. For now though, not 'host_ipmi' means there's nothing we can do. + if (not $anvil->data->{machine}{host_uuid}{$host_uuid}{hosts}{host_ipmi}) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0560", variables => { host_name => $host_name }}); + next; + } + + # Check this target's power state. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0561", variables => { host_name => $host_name }}); + + # Do we share a network with this system? + my $check_power = 1; + my $match = $anvil->Network->find_matches({ + debug => $debug, + first => $anvil->Get->host_uuid, + second => $host_uuid, + }); + my $matched_ips = keys %{$match}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { matched_ips => $matched_ips }}); + if (not $matched_ips) + { + # nothing we can do with this host. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0558", variables => { host_name => $host_name }}); + next; + } + foreach my $interface (sort {$a cmp $b} keys %{$match->{$host_uuid}}) + { + next; + my $ip_address = $match->{$host_uuid}{$interface}{ip}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:interface' => $interface, + 's2:ip_address' => $ip_address, + }}); + + # Can we access the machine? + my ($pinged, $average_time) = $anvil->Network->ping({ + debug => $debug, + count => 3, + ping => $ip_address, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + pinged => $pinged, + average_time => $average_time, + }}); + if ($pinged) + { + my $access = $anvil->Remote->test_access({ + debug => $debug, + target => $ip_address, + user => "root", + password => $password, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + if ($access) + { + # It's up. + $check_power = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_power => $check_power }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0562", variables => { host_name => $host_name }}); + last; + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_power => $check_power }}); + if (not $check_power) + { + next; + } + + # Do we have IPMI info? + if (not $host_ipmi) + { + ### TODO: Add support for power-cycling a target using PDUs. Until this, this + ### will never be hit as we next on no host_ipmi, but will be useful + ### when PDU support is added. + # Nothing we can do (for now) + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0559", variables => { host_name => $host_name }}); + next; + } + + # Check the power state. + my $shell_call = $host_ipmi; + $shell_call =~ s/--action status//; + $shell_call =~ s/-o status//; + $shell_call .= " --action status"; + $shell_call =~ s/ --action/ --action/; + my ($output, $return_code) = $anvil->System->call({debug => $debug, timeout => 30, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code }}); + foreach my $line (split/\n/, $output) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + } + + if ($return_code eq "2") + { + # Node is off. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0564", variables => { host_name => $host_name }}); + } + elsif ($return_code eq "0") + { + # Node is on. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0563", variables => { host_name => $host_name }}); + next; + } + + # Still here? See if we know why the node is off. + my $boot_target = 0; + my $stop_reason = "unknown"; + my $query = " +SELECT + variable_value +FROM + variables +WHERE + variable_name = 'system::stop_reason' +AND + variable_source_table = 'hosts' +AND + variable_source_uuid = ".$anvil->Database->quote($host_uuid)." +;"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if ($count) + { + $stop_reason = $results->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { stop_reason => $stop_reason }}); + } + + if (not $stop_reason) + { + # Nothing to do. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0565", variables => { host_name => $host_name }}); + next; + } + elsif ($stop_reason eq "user") + { + # Nothing to do. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0566", variables => { host_name => $host_name }}); + next; + } + elsif ($stop_reason eq "power") + { + # Check now if the power is OK + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0567", variables => { host_name => $host_name }}); + my ($power_health, $shorted_time_on_batteries, $highest_charge_percentage, $estimated_hold_up_time) = $anvil->ScanCore->check_power({ + debug => $debug, + anvil_uuid => $anvil_uuid, + anvil_name => $anvil_name, + host_uuid => $host_uuid, + host_name => $host_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + power_health => $power_health, + shorted_time_on_batteries => $shorted_time_on_batteries, + highest_charge_percentage => $highest_charge_percentage, + estimated_hold_up_time => $estimated_hold_up_time, + }}); + # * 0 - No UPSes found for the host + # * 1 - One or more UPSes found and at least one has input power from mains. + # * 2 - One or more UPSes found, all are running on battery. + if ($power_health eq "1") + { + # Power is (at least partially) back. What's the charge percent? + if ((not $anvil->data->{scancore}{power}{safe_boot_percentage}) or ($anvil->data->{scancore}{power}{safe_boot_percentage} =~ /\D/)) + { + $anvil->data->{scancore}{power}{safe_boot_percentage} = 35; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { + "scancore::power::safe_boot_percentage" => $anvil->data->{scancore}{power}{safe_boot_percentage}, + }}); + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + highest_charge_percentage => $highest_charge_percentage, + "scancore::power::safe_boot_percentage" => $anvil->data->{scancore}{power}{safe_boot_percentage}, + }}); + if ($highest_charge_percentage >= $anvil->data->{scancore}{power}{safe_boot_percentage}) + { + # Safe to boot! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0574", variables => { host_name => $host_name }}); + $shell_call =~ s/--action status/ --action on/; + my ($output, $return_code) = $anvil->System->call({debug => $debug, timeout => 30, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + } + } + } + elsif ($stop_reason eq "thermal") + { + # Check now if the temperature is OK. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0568", variables => { host_name => $host_name }}); + + my ($ipmitool_command, $ipmi_password) = $anvil->Convert->fence_ipmilan_to_ipmitool({ + debug => 2, + fence_ipmilan_command => $host_ipmi, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + ipmitool_command => $ipmitool_command, + ipmi_password => $anvil->Log->is_secure($ipmi_password), + }}); + + if ((not $ipmitool_command) or ($ipmitool_command eq "!!error!!")) + { + # No IPMI tool to call. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0573", variables => { host_name => $host_name }}); + next; + } + + $anvil->System->collect_ipmi_data({ + host_name => $host_name, + ipmitool_command => $ipmitool_command, + ipmi_password => $ipmi_password, + }); + + # Now look for thermal values. + my $sensor_found = 0; + my $temperatures_ok = 1; + foreach my $sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) + { + my $current_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value}; + my $units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}; + my $status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + current_value => $current_value, + sensor_name => $sensor_name, + units => $units, + status => $status, + }}); + + # If this is a temperature, check to see if it is outside its nominal range and, if + # so, record it into a hash for loading into ScanCore's 'temperature' table. + if ($units eq "C") + { + if (not $sensor_found) + { + # We've found at least one temperature sensor. + $sensor_found = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_found => $sensor_found }}); + } + + if ($status ne "ok") + { + # Sensor isn't OK yet. + $temperatures_ok = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { temperatures_ok => $temperatures_ok }}); + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + sensor_found => $sensor_found, + temperatures_ok => $temperatures_ok, + }}); + if (($sensor_found) && ($temperatures_ok)) + { + ### TODO: We'll want to revisit M2's restart cooldown logic. It never + ### actually proved useful in M2, but it doesn't mean it wouldn't help + ### in the right situation. + # Safe to boot! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0575", variables => { host_name => $host_name }}); + $shell_call =~ s/--action status/ --action on/; + my ($output, $return_code) = $anvil->System->call({debug => $debug, timeout => 30, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + } + } + } + + return(0); +} + +# =head3 +# +# Private Functions; +# +# =cut + +############################################################################################################# +# Private functions # +############################################################################################################# + +# This looks in the passed-in directory for scan agents or sub-directories (which will in turn be scanned). +sub _scan_directory +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->_scan_directory()" }}); + + my $directory = defined $parameter->{directory} ? $parameter->{directory} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + directory => $directory, + }}); + + if (not $directory) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "ScanCore->_scan_directory()", parameter => "directory" }}); + return("!!error!!"); + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { directory => $directory }}); + local(*DIRECTORY); + opendir(DIRECTORY, $directory); + while(my $file = readdir(DIRECTORY)) + { + next if $file eq "."; + next if $file eq ".."; + my $full_path = $directory."/".$file; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + file => $file, + full_path => $full_path, + }}); + # If we're looking at a directory, scan it. Otherwise, see if it's an executable and that it + # starts with 'scan-*'. + if (-d $full_path) + { + # This is a directory, dive into it. + $anvil->ScanCore->_scan_directory({directory => $full_path}); + } + elsif (-x $full_path) + { + # Now I only want to know if the file starts with 'scan-' + next if $file !~ /^scan-/; + + # If I am still alive, I am looking at a scan agent! + $anvil->data->{scancore}{agent}{$file} = $full_path; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "scancore::agent::${file}" => $anvil->data->{scancore}{agent}{$file}, + }}); + } + } + closedir(DIRECTORY); + + return(0); +} + + 1; diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index 16417e50..8be8440e 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -27,6 +27,7 @@ my $THIS_FILE = "System.pm"; # check_ssh_keys # check_memory # check_storage +# collect_ipmi_data # configure_ipmi # disable_daemon # enable_daemon @@ -1196,6 +1197,309 @@ sub check_storage return(0); } + +=head2 collect_ipmi_data + +This takes an C<< ipmitool >> command (for access, not including ending command or password!) and calls thae target IPMI BMC. The returned data is collected and parsed. + +If failed to access, C<< 1 >> is returned. If there is a problem, C<< !!error!! >> is returned. If data is collected, C<< 0 >> is returned. + +Recorded data is stored as: + + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_value_sensor_value + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_units + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_status + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_high_critical + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_high_warning + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_low_critical + ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_low_warning + +parameters; + +=head3 host_name (required) + +This is the name used to store the target's information. Generally, this should be the C<< host_name >> value for the target machine, as stored in C<< hosts >>. + +=head3 ipmitool_command (required) + +This is the C<< ipmitool >> command used to authenticate against and access the target BMC. This must not contain the password, or the command to run on the BMC. Those parts are handled by this method. + +=head3 ipmi_password (optional) + +If the target BMC requires a password (and they usually do...), the password will be written to a temporary file, and C<< -f >> will be used as part of the final C<< ipmitool >> command call. As soon as the call returns, the temp file is deleted. + +=cut +sub collect_ipmi_data +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "System->collect_ipmi_data()" }}); + + my $host_name = defined $parameter->{host_name} ? $parameter->{host_name} : ""; + my $ipmitool_command = defined $parameter->{ipmitool_command} ? $parameter->{ipmitool_command} : ""; + my $ipmi_password = defined $parameter->{ipmi_password} ? $parameter->{ipmi_password} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + host_name => $host_name, + ipmitool_command => $ipmitool_command, + ipmi_password => $anvil->Log->is_secure($ipmi_password), + }}); + + if (not $host_name) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Systeme->collect_ipmi_data()", parameter => "host_name" }}); + return('!!error!!'); + } + if (not $ipmitool_command) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Systeme->collect_ipmi_data()", parameter => "ipmitool_command" }}); + return('!!error!!'); + } + + my $read_start_time = time; + + # If there is a password, write it to a temp file. + my $problem = 1; + my $temp_file = ""; + if ($ipmi_password) + { + # Write the password to a temp file. + $temp_file = "/tmp/scancore.".$anvil->Get->uuid({short => 1}); + $anvil->Storage->write_file({ + body => $ipmi_password, + secure => 1, + file => $temp_file, + overwrite => 1, + }); + } + + # Call with a timeout in case the call hangs. + my $shell_call = $ipmitool_command." sensor list all"; + if ($ipmi_password) + { + $shell_call = $ipmitool_command." -f ".$temp_file." sensor list all"; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({timeout => 30, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + # Delete the temp file. + unlink $temp_file; + + my $temp_count = 1; + foreach my $line (split/\n/, $output) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ">> line" => $line }}); + + # Clean up the output + $line =~ s/^\s+//; + $line =~ s/\s+$//; + $line =~ s/\s+\|/|/g; + $line =~ s/\|\s+/|/g; + + ### TODO: If we determine that the IPMI BMC is hung, set the health to '10' + ### $anvil->data->{'scan-ipmitool'}{health}{new}{'ipmi:bmc_controller'} = 10; + # Catch errors: + if ($line =~ /Activate Session command failed/) + { + # Failed to connect. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0002", variables => { + host_name => $host_name, + call => $ipmitool_command, + }}); + } + next if $line !~ /\|/; + + if ($problem) + { + $problem = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {problem => $problem }}); + } + + # high fail -------------------------------------. + # high critical ---------------------------------. | + # high warning -----------------------------. | | + # low warning -------------------------. | | | + # low critical ---------------------. | | | | + # low fail -----------------. | | | | | + # status -------------. | | | | | | + # units ---------. | | | | | | | + # current value -----. | | | | | | | | + # sensor name -. | | | | | | | | | + # Columns: | | | | | | | | | | + # x | x | x | x | x | x | x | x | x | x + my ($sensor_name, + $current_value, + $units, + $status, + $low_fail, + $low_critical, + $low_warning, + $high_warning, + $high_critical, + $high_fail) = split /\|/, $line; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + sensor_name => $sensor_name, + current_value => $current_value, + units => $units, + status => $status, + low_fail => $low_fail, + low_critical => $low_critical, + low_warning => $low_warning, + high_warning => $high_warning, + high_critical => $high_critical, + high_fail => $high_fail, + }}); + + next if not $sensor_name; + next if not $status; + next if not $units; + next if $units =~ /discrete/; + + $units = "C" if $units =~ /degrees C/i; + $units = "F" if $units =~ /degrees F/i; + $units = "%" if $units =~ /percent/i; + $units = "W" if $units =~ /watt/i; + $units = "V" if $units =~ /volt/i; + + # The BBU and RAID Controller, as reported by IPMI, is flaky and redundant. We + # monitor it via storcli/hpacucli (or OEM variant of), so we ignore it here. + next if $sensor_name eq "BBU"; + next if $sensor_name eq "RAID Controller"; + + # HP seems to stick 'XX-' in front of some sensor names. + $sensor_name =~ s/^\d\d-//; + + # Single PSU hosts often call their PSU just that, without a suffix integer. We'll + # add '1' in such cases. + if ($sensor_name eq "PSU") + { + $sensor_name = "PSU1"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); + } + + # Dells have two sensors called simply "Temp". + if ($sensor_name eq "Temp") + { + $sensor_name = "Temp".$temp_count++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); + } + + # Thresholds that are 'na' need to be converted to numeric + $low_fail = -99 if $low_fail eq "na"; + $low_critical = -99 if $low_critical eq "na"; + $low_warning = -99 if $low_warning eq "na"; + $high_warning = 999 if $high_warning eq "na"; + $high_critical = 999 if $high_critical eq "na"; + $high_fail = 999 if $high_fail eq "na"; + + # Values in the DB that are 'double precision' must be '' if not set. + $current_value = '' if not $current_value; + $low_fail = '' if not $low_fail; + $low_critical = '' if not $low_critical; + $low_warning = '' if not $low_warning; + $high_warning = '' if not $high_warning; + $high_critical = '' if not $high_critical; + $high_fail = '' if not $high_fail; + + # Some values list 'inf' on some machines (HP...). Convert these to ''. + $current_value = '' if $current_value eq "inf"; + $low_fail = '' if $low_fail eq "inf"; + $low_critical = '' if $low_critical eq "inf"; + $low_warning = '' if $low_warning eq "inf"; + $high_warning = '' if $high_warning eq "inf"; + $high_critical = '' if $high_critical eq "inf"; + $high_fail = '' if $high_fail eq "inf"; + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + sensor_name => $sensor_name, + current_value => $current_value, + units => $units, + status => $status, + low_fail => $low_fail, + low_critical => $low_critical, + low_warning => $low_warning, + high_warning => $high_warning, + high_critical => $high_critical, + high_fail => $high_fail, + }}); + + if ($units eq "F") + { + # Convert to 'C' + $high_critical = $anvil->Convert->fahrenheit_to_celsius({temperature => $high_critical}) if $high_critical ne ""; + $high_warning = $anvil->Convert->fahrenheit_to_celsius({temperature => $high_warning}) if $high_warning ne ""; + $low_critical = $anvil->Convert->fahrenheit_to_celsius({temperature => $low_critical}) if $low_critical ne ""; + $low_warning = $anvil->Convert->fahrenheit_to_celsius({temperature => $low_warning}) if $low_warning ne ""; + $units = "C"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + low_critical => $low_critical, + low_warning => $low_warning, + high_warning => $high_warning, + high_critical => $high_critical, + units => $units, + }}); + } + + ### TODO: It looks like the PSU state and the PSU temperature are called, simply, + ### 'PSUx'... If so, change the temperature to 'PSUx Temperature' + if (($units eq "C") && ($sensor_name =~ /^PSU\d/i)) + { + $sensor_name .= " Temperature"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); + } + + # Similarly, 'PSUx Power' is used for power status and wattage.... + if (($units eq "W") && ($sensor_name =~ /PSU\d Power/i)) + { + $sensor_name =~ s/Power/Wattage/; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); + } + + # And again, 'FAN PSUx' is used for both RPM and state... + if (($units eq "RPM") && ($sensor_name =~ /^FAN PSU\d/i)) + { + $sensor_name .= " RPM"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); + } + + # Record + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value} = $current_value; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units} = $units; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status} = $status; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_critical} = $high_critical; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_warning} = $high_warning; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_critical} = $low_critical; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_warning} = $low_warning; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_units" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_status" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_high_critical" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_critical}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_high_warning" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_warning}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_low_critical" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_critical}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_low_warning" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_warning}, + }}); + } + + # Record how long it took. + my $sensor_read_time = $anvil->Convert->time({'time' => (time - $read_start_time)}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0003", variables => { + host_name => $host_name, + 'time' => $sensor_read_time + }}); + + + return($problem); +} + + =head2 configure_ipmi This uses the host information along with the Anvil! the host is in to find and configure the local IPMI BMC. diff --git a/scancore-agents/scan-apc-pdu/scan-apc-pdu b/scancore-agents/scan-apc-pdu/scan-apc-pdu index b0842974..8e73bc59 100755 --- a/scancore-agents/scan-apc-pdu/scan-apc-pdu +++ b/scancore-agents/scan-apc-pdu/scan-apc-pdu @@ -1889,8 +1889,11 @@ sub gather_pdu_data } # Can I ping it? This returns '1' if it was pingable, '0' if not. - my ($pinged) = $anvil->Network->ping({ping => $pdu_ip}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pinged => $pinged }}); + my ($pinged, $average_time) = $anvil->Network->ping({ping => $pdu_ip}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + pinged => $pinged, + average_time => $average_time, + }}); if (not $pinged) { diff --git a/scancore-agents/scan-apc-ups/scan-apc-ups b/scancore-agents/scan-apc-ups/scan-apc-ups index 989723e4..d3256764 100755 --- a/scancore-agents/scan-apc-ups/scan-apc-ups +++ b/scancore-agents/scan-apc-ups/scan-apc-ups @@ -1496,7 +1496,7 @@ SELECT FROM scan_apc_ups_input WHERE - scan_apc_ups_uuid = ".$anvil->Database->quote($scan_apc_ups_uuid)." + scan_apc_ups_input_scan_apc_ups_uuid = ".$anvil->Database->quote($scan_apc_ups_uuid)." AND scan_apc_ups_input_1m_minimum_input_voltage < ".$low_limit." ORDER BY @@ -2092,8 +2092,11 @@ sub gather_ups_data } # Can I ping it? This returns '1' if it was pingable, '0' if not. - my ($pinged) = $anvil->Network->ping({ping => $ups_ip}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pinged => $pinged }}); + my ($pinged, $average_time) = $anvil->Network->ping({ping => $ups_ip}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + pinged => $pinged, + average_time => $average_time, + }}); if (not $pinged) { diff --git a/scancore-agents/scan-ipmitool/scan-ipmitool b/scancore-agents/scan-ipmitool/scan-ipmitool index 1ec5a5f5..f66810ec 100755 --- a/scancore-agents/scan-ipmitool/scan-ipmitool +++ b/scancore-agents/scan-ipmitool/scan-ipmitool @@ -15,6 +15,8 @@ # 255 - The host's UUID isn't in the hosts table yet, ScanCore itself hasn't been run. # # TODO: +# - Don't bother scanning other hosts.... ScanCore does direct calls to decide if/when to reboot an offline +# node. # - Decide if we should parse 'ipmitool sel list' # - Detect a hung BMC by trying to talk to ourselves and, if that fails, send 'ipmitool bmc reset cold'. # Possibly try pinging the IPMI from the peer as it is not always possible to ping our own interface when @@ -113,7 +115,7 @@ $anvil->data->{'scan-ipmitool'} = { disable => 0, # It will be marked as 'clear' when the temperature drops this many °C below the # critical temperature. - machine => {}, + host_name => {}, alert_sort => 2, # These are used when no other limits are set for a given sensor. thresholds => { @@ -309,26 +311,26 @@ sub find_changes { my ($anvil) = @_; - # Loop through each machine - foreach my $machine (sort {$a cmp $b} keys %{$anvil->data->{ipmi}}) + # Loop through each host_name + foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}}) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { machine => $machine }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_name => $host_name }}); - # This returns the number of read sensors already in the DB for this machine recorded by us + # This returns the number of read sensors already in the DB for this host_name recorded by us # previously. - if (read_last_scan($anvil, $machine)) + if (read_last_scan($anvil, $host_name)) { - ### Existing machine, UPDATE or INSERT as needed. - foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}}) + ### Existing host_name, UPDATE or INSERT as needed. + foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) { # Put the new values into variables - my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $new_scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; - my $new_scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; - my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $new_scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; + my $new_scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; + my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_scan_ipmitool_value_sensor_value => $new_scan_ipmitool_value_sensor_value, new_scan_ipmitool_sensor_units => $new_scan_ipmitool_sensor_units, @@ -342,7 +344,7 @@ sub find_changes # If the new value is 'na', we failed to read it. Skip. if ($new_scan_ipmitool_value_sensor_value eq "na") { - delete $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; next; } @@ -350,8 +352,8 @@ sub find_changes if (not $new_scan_ipmitool_sensor_units) { my $variables = { - machine => $machine, - sensor => $scan_ipmitool_sensor_name, + host_name => $host_name, + sensor => $scan_ipmitool_sensor_name, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_ipmitool_message_0002", variables => $variables}); $anvil->Alert->register({alert_level => "notice", message => "scan_ipmitool_message_0002", variables => $variables, set_by => $THIS_FILE, sort_position => $anvil->data->{'scan-ipmitool'}{alert_sort}++}); @@ -360,8 +362,8 @@ sub find_changes if (not $new_scan_ipmitool_value_sensor_value) { my $variables = { - machine => $machine, - sensor => $scan_ipmitool_sensor_name, + host_name => $host_name, + sensor => $scan_ipmitool_sensor_name, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_ipmitool_message_0003", variables => $variables}); $anvil->Alert->register({alert_level => "notice", message => "scan_ipmitool_message_0003", variables => $variables, set_by => $THIS_FILE, sort_position => $anvil->data->{'scan-ipmitool'}{alert_sort}++}); @@ -377,20 +379,20 @@ sub find_changes # Have I seen this sensor before? $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "ref(sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name})" => ref($anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}), + "ref(sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name})" => ref($anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}), }}); - if (ref($anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name})) + if (ref($anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name})) { ### Existing record, update it if needed. # Put the old values into variables - my $scan_ipmitool_uuid = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid}; - my $old_scan_ipmitool_value_sensor_value = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $old_scan_ipmitool_sensor_units = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; - my $old_scan_ipmitool_sensor_status = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; - my $old_scan_ipmitool_sensor_high_critical = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $old_scan_ipmitool_sensor_high_warning = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $old_scan_ipmitool_sensor_low_critical = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $old_scan_ipmitool_sensor_low_warning = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $scan_ipmitool_uuid = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid}; + my $old_scan_ipmitool_value_sensor_value = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $old_scan_ipmitool_sensor_units = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; + my $old_scan_ipmitool_sensor_status = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; + my $old_scan_ipmitool_sensor_high_critical = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $old_scan_ipmitool_sensor_high_warning = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $old_scan_ipmitool_sensor_low_critical = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $old_scan_ipmitool_sensor_low_warning = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_ipmitool_uuid => $scan_ipmitool_uuid, old_scan_ipmitool_value_sensor_value => $old_scan_ipmitool_value_sensor_value, @@ -436,7 +438,7 @@ sub find_changes # # We loop out now because the rest of the values will look # # really bad (ie: temps down to 0c) and we don't want to # # trigger preventative actions on bad data. -# delete $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; +# delete $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; # next; # } # elsif (($new_scan_ipmitool_sensor_status ne "na") && ($old_scan_ipmitool_sensor_status eq "na")) @@ -457,7 +459,7 @@ sub find_changes { # Update (no surprise ...) $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, sensor_name => $sensor_name, new_scan_ipmitool_value_sensor_value => $new_scan_ipmitool_value_sensor_value, old_scan_ipmitool_value_sensor_value => $old_scan_ipmitool_value_sensor_value, @@ -493,12 +495,12 @@ WHERE if ($new_scan_ipmitool_sensor_units eq "C") { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, sensor_name => $sensor_name, new_scan_ipmitool_value_sensor_value => $new_scan_ipmitool_value_sensor_value, old_scan_ipmitool_value_sensor_value => $old_scan_ipmitool_value_sensor_value, }}); - ($level, $message_key) = process_temperature_change($anvil, $machine, $scan_ipmitool_sensor_name); + ($level, $message_key) = process_temperature_change($anvil, $host_name, $scan_ipmitool_sensor_name); my $variables = { sensor_name => $sensor_name, @@ -568,8 +570,8 @@ WHERE if (not $new_scan_ipmitool_sensor_units) { my $variables = { - machine => $machine, - sensor => $scan_ipmitool_sensor_name, + host_name => $host_name, + sensor => $scan_ipmitool_sensor_name, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_message_0015", variables => $variables}); $anvil->Alert->register({alert_level => "notice", message => "scan_ipmitool_message_0015", variables => $variables, set_by => $THIS_FILE, show_header => 0, sort_position => $anvil->data->{'scan-ipmitool'}{alert_sort}++}); @@ -588,7 +590,7 @@ SET scan_ipmitool_sensor_low_warning = ".$anvil->Database->quote($new_scan_ipmitool_sensor_low_warning).", modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})." WHERE - scan_ipmitool_sensor_host = ".$anvil->Database->quote($machine)." + scan_ipmitool_sensor_host = ".$anvil->Database->quote($host_name)." AND scan_ipmitool_uuid = ".$anvil->Database->quote($scan_ipmitool_uuid)." ;"; @@ -635,17 +637,17 @@ AND # Delete the old key so that I can check to see what sensors # vanished. - delete $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; } else { ### NOTE: If the new value is 'na', we ignore it as it is likely a ### sensor that doesn't actually exist. - if ($anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} eq "") + if ($anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} eq "") { # Ignore it. $anvil->Log->entry({log_level => 3, message_key => "scan_ipmitool_log_0005", variables => { sensor_name => $scan_ipmitool_sensor_name }, file => $THIS_FILE, line => __LINE__}); - delete $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; next; } @@ -654,14 +656,14 @@ AND my $scan_ipmitool_uuid = $anvil->Get->uuid(); # Record the new UUID - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; ### NOTE: These were added to debug duplicate scan_ipmitool_values entries. if (not $new_scan_ipmitool_sensor_units) { my $variables = { - machine => $machine, - sensor => $scan_ipmitool_sensor_name, + host_name => $host_name, + sensor => $scan_ipmitool_sensor_name, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_message_0017", variables => $variables}); $anvil->Alert->register({alert_level => "notice", message => "scan_ipmitool_message_0017", variables => $variables, set_by => $THIS_FILE, sort_position => $anvil->data->{'scan-ipmitool'}{alert_sort}++}); @@ -670,8 +672,8 @@ AND if (not $new_scan_ipmitool_value_sensor_value) { my $variables = { - machine => $machine, - sensor => $scan_ipmitool_sensor_name, + host_name => $host_name, + sensor => $scan_ipmitool_sensor_name, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_message_0018", variables => $variables}); $anvil->Alert->register({alert_level => "notice", message => "scan_ipmitool_message_0018", variables => $variables, set_by => $THIS_FILE, sort_position => $anvil->data->{'scan-ipmitool'}{alert_sort}++}); @@ -697,7 +699,7 @@ INSERT INTO ) VALUES ( ".$anvil->Database->quote($scan_ipmitool_uuid).", ".$anvil->Database->quote($anvil->Get->host_uuid).", - ".$anvil->Database->quote($machine).", + ".$anvil->Database->quote($host_name).", ".$anvil->Database->quote($scan_ipmitool_sensor_name).", ".$anvil->Database->quote($new_scan_ipmitool_sensor_units).", ".$anvil->Database->quote($new_scan_ipmitool_sensor_status).", @@ -738,7 +740,7 @@ INSERT INTO my $message_key = "scan_ipmitool_message_0019"; my $level = "notice"; my $variables = { - machine => $machine, + host_name => $host_name, sensor_name => $sensor_name, sensor_value => $sensor_value, sensor_status => $new_scan_ipmitool_sensor_status, @@ -768,18 +770,18 @@ INSERT INTO } else { - # New machine, INSERT everything. - foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}}) + # New host_name, INSERT everything. + foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name }}); ### NOTE: If the new value is 'na', we ignore it as it is likely a sensor ### that doesn't actually exist. - if ($anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} eq "") + if ($anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} eq "") { # Ignore it. $anvil->Log->entry({log_level => 3, message_key => "scan_ipmitool_log_0005", variables => { sensor_name => $scan_ipmitool_sensor_name }, file => $THIS_FILE, line => __LINE__}); - delete $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; next; } @@ -787,15 +789,15 @@ INSERT INTO my $scan_ipmitool_uuid = $anvil->Get->uuid() or $anvil->Alert->error({title_key => "error_title_0020", message_key => "error_message_0024", code => 2, file => $THIS_FILE, line => __LINE__}); # Record it. - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; - my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $new_scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; - my $new_scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; - my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $new_scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; + my $new_scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; + my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_ipmitool_uuid => $scan_ipmitool_uuid, new_scan_ipmitool_sensor_units => $new_scan_ipmitool_sensor_units, @@ -809,7 +811,7 @@ INSERT INTO # If the new value is 'na', we failed to read it. Skip. if ($new_scan_ipmitool_value_sensor_value eq "na") { - delete $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}; next; } @@ -843,7 +845,7 @@ INSERT INTO ".$anvil->Database->quote($scan_ipmitool_uuid).", ".$anvil->Database->quote($anvil->Get->host_uuid).", ".$anvil->Database->quote($scan_ipmitool_sensor_name).", - ".$anvil->Database->quote($machine).", + ".$anvil->Database->quote($host_name).", ".$anvil->Database->quote($new_scan_ipmitool_sensor_units).", ".$anvil->Database->quote($new_scan_ipmitool_sensor_status).", ".$anvil->Database->quote($new_scan_ipmitool_sensor_high_critical).", @@ -892,7 +894,7 @@ INSERT INTO } my $variables = { - machine => $machine, + host_name => $host_name, sensor => $scan_ipmitool_sensor_name, sensor_value => $sensor_value, sensor_status => $new_scan_ipmitool_sensor_status, @@ -909,7 +911,7 @@ INSERT INTO } # If I am scanning myself and if I see problems, I will set the health accordingly. - if (($machine eq $anvil->Get->host_name) or ($machine eq $anvil->Get->short_host_name)) + if (($host_name eq $anvil->Get->host_name) or ($host_name eq $anvil->Get->short_host_name)) { $anvil->data->{sys}{scanning_myself} = 1; } @@ -920,10 +922,10 @@ INSERT INTO $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::scanning_myself" => $anvil->data->{sys}{scanning_myself} }}); ### Now add, update and delete 'temperature' entries. - process_temperature($anvil, $machine); + process_temperature($anvil, $host_name); # Now look for any sensors that are in a bad state and set the health accordingly. - check_sensor_health($anvil, $machine); + check_sensor_health($anvil, $host_name); } # Now commit the changes. @@ -936,19 +938,19 @@ INSERT INTO # health table as needed for sensors out of scope. sub check_sensor_health { - my ($anvil, $machine) = @_; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { machine => $machine }}); + my ($anvil, $host_name) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_name => $host_name }}); - foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}}) + foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) { # Put the new values into variables - my $scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; - my $scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; - my $scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $scan_ipmitool_sensor_units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}; + my $scan_ipmitool_sensor_status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}; + my $scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, scan_ipmitool_value_sensor_value => $scan_ipmitool_value_sensor_value, @@ -1027,8 +1029,8 @@ sub check_sensor_health # This takes the temperature sensors and feeds them into the 'temperature' table, deleting stale entries as needed. sub process_temperature { - my ($anvil, $machine) = @_; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { machine => $machine }}); + my ($anvil, $host_name) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_name => $host_name }}); $anvil->data->{'scan-ipmitool'}{queries} = []; @@ -1049,7 +1051,7 @@ WHERE AND temperature_agent_name = ".$anvil->Database->quote($THIS_FILE)." AND - temperature_sensor_host = ".$anvil->Database->quote($machine)." + temperature_sensor_host = ".$anvil->Database->quote($host_name)." ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); @@ -1058,27 +1060,27 @@ AND foreach my $row (@{$results}) { my $scan_ipmitool_sensor_name = $row->[1]; - $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid} = $row->[0]; - $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c} = $row->[2]; - $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight} = $row->[3]; - $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state} = $row->[4]; - $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is} = $row->[5]; + $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid} = $row->[0]; + $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c} = $row->[2]; + $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight} = $row->[3]; + $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state} = $row->[4]; + $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is} = $row->[5]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "old::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_uuid" => $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}, - "old::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "old::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_weight" => $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}, - "old::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "old::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "old::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_uuid" => $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}, + "old::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "old::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_weight" => $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}, + "old::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "old::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } # Look at the new values. - foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{new}{$machine}{temperature}}) + foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{new}{$host_name}{temperature}}) { - my $new_temperature_uuid = $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; - my $new_temperature_value_c = $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}; - my $new_temperature_state = $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}; - my $new_temperature_is = $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}; + my $new_temperature_uuid = $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; + my $new_temperature_value_c = $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}; + my $new_temperature_state = $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}; + my $new_temperature_is = $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, new_temperature_uuid => $new_temperature_uuid, @@ -1089,7 +1091,7 @@ AND if ($new_temperature_value_c eq "na") { - delete $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}; next; } @@ -1100,9 +1102,9 @@ AND { # What weight will we apply to this sensor? $new_temperature_weight = $anvil->data->{'scan-ipmitool'}{thresholds}{'default'}{weight}; - if (exists $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}) + if (exists $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}) { - $new_temperature_weight = $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}; + $new_temperature_weight = $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_weight}; } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_temperature_weight => $new_temperature_weight }}); if (exists $anvil->data->{'scan-ipmitool'}{thresholds}{$scan_ipmitool_sensor_name}{weight}) @@ -1135,17 +1137,17 @@ AND # Now see if the variable was seen before and, if so, if it changed. my $temperature_uuid = ""; - if (ref($anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name})) + if (ref($anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name})) { # Update the existing entry, if needed. - my $temperature_uuid = $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; + my $temperature_uuid = $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; } # Generate and store the UUID. - my $sensor_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $machine}); + my $sensor_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $host_name}); if (not $sensor_host_uuid) { - $sensor_host_uuid = $machine; + $sensor_host_uuid = $host_name; } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_host_uuid => $sensor_host_uuid }}); $temperature_uuid = $anvil->Database->insert_or_update_temperature({ @@ -1159,26 +1161,26 @@ AND temperature_value_c => $new_temperature_value_c, temperature_state => $new_temperature_state, temperature_is => $new_temperature_is, - temperature_weight => $new_temperature_weigh, + temperature_weight => $new_temperature_weight, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { temperature_uuid => $temperature_uuid }}); # We still want this value, so delete it from the hash. - if (exists $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}) + if (exists $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}) { - delete $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}; + delete $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}; } - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid} = $temperature_uuid; + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid} = $temperature_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_uuid" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_uuid" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}, }}); } # Now, if any undeleted old entries remain, delete them from the database. - foreach my $ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{old}{$machine}{temperature}}) + foreach my $scan_ipmitool_sensor_name (sort {$a cmp $b} keys %{$anvil->data->{old}{$host_name}{temperature}}) { - my $temperature_uuid = $anvil->data->{old}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; + my $temperature_uuid = $anvil->data->{old}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { temperature_uuid => $temperature_uuid }}); $temperature_uuid = $anvil->Database->insert_or_update_temperature({ @@ -1199,19 +1201,19 @@ AND # This logs thermal sensor values that are outside nominal ranges sub log_abnormal_temperatures { - my ($anvil, $machine, $scan_ipmitool_sensor_name) = @_; + my ($anvil, $host_name, $scan_ipmitool_sensor_name) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, }}); - my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, new_scan_ipmitool_value_sensor_value => $new_scan_ipmitool_value_sensor_value, }}); @@ -1303,7 +1305,7 @@ sub log_abnormal_temperatures # Record the levels $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, high_warning => $high_warning, high_critical => $high_critical, @@ -1316,76 +1318,76 @@ sub log_abnormal_temperatures if ($new_scan_ipmitool_value_sensor_value < $low_critical) { # Setup the 'temperature' entry. - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { temperature_value_c => $new_scan_ipmitool_value_sensor_value, temperature_state => 'critical', temperature_is => 'low', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } elsif ($new_scan_ipmitool_value_sensor_value < $low_warning) { # Setup the 'temperature' entry. - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { - temperature_value_c => $new_scan_ipmitool_value_sensor_value, - temperature_state => 'warning', - temperature_is => 'low', + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { + temperature_value_c => $new_scan_ipmitool_value_sensor_value, + temperature_state => 'warning', + temperature_is => 'low', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } elsif ($new_scan_ipmitool_value_sensor_value > $high_critical) { # Setup the 'temperature' entry. - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { - temperature_value_c => $new_scan_ipmitool_value_sensor_value, - temperature_state => 'critical', - temperature_is => 'high', + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { + temperature_value_c => $new_scan_ipmitool_value_sensor_value, + temperature_state => 'critical', + temperature_is => 'high', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } elsif ($new_scan_ipmitool_value_sensor_value > $high_warning) { # Setup the 'temperature' entry. - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { - temperature_value_c => $new_scan_ipmitool_value_sensor_value, - temperature_state => 'warning', - temperature_is => 'high', + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { + temperature_value_c => $new_scan_ipmitool_value_sensor_value, + temperature_state => 'warning', + temperature_is => 'high', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } else { - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { - temperature_value_c => $new_scan_ipmitool_value_sensor_value, - temperature_state => 'ok', - temperature_is => 'nominal', + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { + temperature_value_c => $new_scan_ipmitool_value_sensor_value, + temperature_state => 'ok', + temperature_is => 'nominal', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } ### NOTE: Recording all temps now, so this might not be needed # When the dashboard scans a node, it needs to know about the Ambient and Systemboard temperatures in - # order to decide whether the node is safe too boot back up or not. So if this machine is a + # order to decide whether the node is safe too boot back up or not. So if this host_name is a # dashboard, log the 'Ambient' and 'Systemboard' temperatures (or whatever the user defined) as # 'good', if they're not already in the 'new::temperature::x' hash. my $host_type = $anvil->Get->host_type(); @@ -1398,17 +1400,17 @@ sub log_abnormal_temperatures }}); if (($scan_ipmitool_sensor_name eq $sensor) && ($host_type eq "striker") && - (not exists $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name})) + (not exists $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name})) { - $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name} = { + $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name} = { temperature_value_c => $new_scan_ipmitool_value_sensor_value, temperature_state => 'ok', temperature_is => 'nominal', }; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, - "new::${machine}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$machine}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_value_c" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_value_c}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_state" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_state}, + "new::${host_name}::temperature::${scan_ipmitool_sensor_name}::temperature_is" => $anvil->data->{new}{$host_name}{temperature}{$scan_ipmitool_sensor_name}{temperature_is}, }}); } } @@ -1420,9 +1422,9 @@ sub log_abnormal_temperatures # critical state. sub process_temperature_change { - my ($anvil, $machine, $scan_ipmitool_sensor_name) = @_; + my ($anvil, $host_name, $scan_ipmitool_sensor_name) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, }}); @@ -1436,11 +1438,11 @@ sub process_temperature_change }}); # New values - my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; - my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; - my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; - my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; - my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; + my $new_scan_ipmitool_value_sensor_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}; + my $new_scan_ipmitool_sensor_high_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}; + my $new_scan_ipmitool_sensor_high_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}; + my $new_scan_ipmitool_sensor_low_critical = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}; + my $new_scan_ipmitool_sensor_low_warning = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_scan_ipmitool_value_sensor_value => $new_scan_ipmitool_value_sensor_value, new_scan_ipmitool_sensor_high_critical => $new_scan_ipmitool_sensor_high_critical, @@ -1457,10 +1459,10 @@ sub process_temperature_change } # Old value, if it exists. - my $old_scan_ipmitool_value_sensor_value = $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} ? $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} : 0; + my $old_scan_ipmitool_value_sensor_value = $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} ? $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}, - old_scan_ipmitool_value_sensor_value => $old_scan_ipmitool_value_sensor_value, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}, + old_scan_ipmitool_value_sensor_value => $old_scan_ipmitool_value_sensor_value, }}); ### Buffer, used for clearing all alerts. @@ -1561,7 +1563,7 @@ sub process_temperature_change # Final levels $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, scan_ipmitool_sensor_name => $scan_ipmitool_sensor_name, high_warning => $high_warning, high_critical => $high_critical, @@ -1594,7 +1596,7 @@ sub process_temperature_change # We've gone critical. If it was previously 'warning', clear them. foreach my $type ("temperature_high_warning", "temperature_low_warning", "temperature_low_critical") { - my $record_locator = $machine.":".$scan_ipmitool_sensor_name.":".$type; + my $record_locator = $host_name.":".$scan_ipmitool_sensor_name.":".$type; my $changed = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $record_locator, set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_locator => $record_locator, @@ -1603,7 +1605,7 @@ sub process_temperature_change } # Set the critical warning. - my $changed = $anvil->Alert->check_alert_sent({record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); + my $changed = $anvil->Alert->check_alert_sent({record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }}); # If set, alert the user and register with 'temperature'. @@ -1623,8 +1625,8 @@ sub process_temperature_change { # The temp is rising, so the 'high_critical' should not be set, but check/clear it # anyway to be safe. - $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); - my $changed = $anvil->Alert->check_alert_sent({clear => 0, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_high_warning", set_by => $THIS_FILE}); + $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); + my $changed = $anvil->Alert->check_alert_sent({clear => 0, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_high_warning", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }}); if ($changed) { @@ -1641,8 +1643,8 @@ sub process_temperature_change { # If there was a 'low_warning' or 'low_critical', clear it and tell the user that # we're OK now. - my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); - my $clear_warning = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_warning", set_by => $THIS_FILE}); + my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); + my $clear_warning = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_warning", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { clear_critical => $clear_critical, clear_warning => $clear_warning, @@ -1663,7 +1665,7 @@ sub process_temperature_change elsif ($new_scan_ipmitool_value_sensor_value > ($low_critical + $buffer)) { # It has risen above critically low levels. - my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); + my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { clear_critical => $clear_critical }}); if ($clear_critical) { @@ -1720,7 +1722,7 @@ sub process_temperature_change # We've gone critical. Clear previous alerts... foreach my $type ("temperature_high_critical", "temperature_high_warning", "temperature_low_warning") { - my $record_locator = $machine.":".$scan_ipmitool_sensor_name.":".$type; + my $record_locator = $host_name.":".$scan_ipmitool_sensor_name.":".$type; my $changed = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $record_locator, set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_locator => $record_locator, @@ -1729,7 +1731,7 @@ sub process_temperature_change } # Now set the critical warning. - my $changed = $anvil->Alert->check_alert_sent({record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); + my $changed = $anvil->Alert->check_alert_sent({record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }}); if ($changed) { @@ -1747,8 +1749,8 @@ sub process_temperature_change { # The temp is dropping, so the 'low_critical' should not be set, but check/clear it # anyway to be safe. - $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); - my $changed = $anvil->Alert->check_alert_sent({clear => 0, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_warning", set_by => $THIS_FILE}); + $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); + my $changed = $anvil->Alert->check_alert_sent({clear => 0, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_warning", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }}); if ($changed) { @@ -1765,8 +1767,8 @@ sub process_temperature_change { # If there was a 'high_warning' or 'high_critical', clear it and tell the user that # we're OK now. - my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); - my $clear_warning = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_high_warning", set_by => $THIS_FILE}); + my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_high_critical", set_by => $THIS_FILE}); + my $clear_warning = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_high_warning", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { clear_critical => $clear_critical, clear_warning => $clear_warning, @@ -1787,7 +1789,7 @@ sub process_temperature_change elsif ($new_scan_ipmitool_value_sensor_value < ($high_critical - $buffer)) { # It is below critically high levels. - my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $machine.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); + my $clear_critical = $anvil->Alert->check_alert_sent({clear => 1, record_locator => $host_name.":".$scan_ipmitool_sensor_name.":temperature_low_critical", set_by => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { clear_critical => $clear_critical }}); if ($clear_critical) { @@ -1816,13 +1818,13 @@ sub process_temperature_change # This reads in the last scan's data. sub read_last_scan { - my ($anvil, $machine) = @_; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { machine => $machine }}); + my ($anvil, $host_name) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_name => $host_name }}); - # Make sure I don't have any stray data for this machine. - if (exists $anvil->data->{sql}{$machine}) + # Make sure I don't have any stray data for this host_name. + if (exists $anvil->data->{sql}{$host_name}) { - delete $anvil->data->{sql}{$machine}; + delete $anvil->data->{sql}{$host_name}; } # Read in existing data, if any. @@ -1843,7 +1845,7 @@ FROM WHERE a.scan_ipmitool_uuid = b.scan_ipmitool_value_scan_ipmitool_uuid AND - a.scan_ipmitool_sensor_host = ".$anvil->Database->quote($machine)." + a.scan_ipmitool_sensor_host = ".$anvil->Database->quote($host_name)." AND a.scan_ipmitool_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." ;"; @@ -1867,23 +1869,23 @@ AND my $scan_ipmitool_sensor_low_warning = $row->[7]; my $scan_ipmitool_value_sensor_value = $row->[8]; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units} = $scan_ipmitool_sensor_units; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status} = $scan_ipmitool_sensor_status; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical} = $scan_ipmitool_sensor_high_critical ? $scan_ipmitool_sensor_high_critical : ""; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning} = $scan_ipmitool_sensor_high_warning ? $scan_ipmitool_sensor_high_warning : ""; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical} = $scan_ipmitool_sensor_low_critical ? $scan_ipmitool_sensor_low_critical : ""; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning} = $scan_ipmitool_sensor_low_warning ? $scan_ipmitool_sensor_low_warning : ""; - $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} = $scan_ipmitool_value_sensor_value; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid} = $scan_ipmitool_uuid; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units} = $scan_ipmitool_sensor_units; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status} = $scan_ipmitool_sensor_status; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical} = $scan_ipmitool_sensor_high_critical ? $scan_ipmitool_sensor_high_critical : ""; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning} = $scan_ipmitool_sensor_high_warning ? $scan_ipmitool_sensor_high_warning : ""; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical} = $scan_ipmitool_sensor_low_critical ? $scan_ipmitool_sensor_low_critical : ""; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning} = $scan_ipmitool_sensor_low_warning ? $scan_ipmitool_sensor_low_warning : ""; + $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value} = $scan_ipmitool_value_sensor_value; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_uuid" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_units" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_status" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_high_critical" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_high_warning" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_low_critical" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_low_warning" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}, - "sql::${machine}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{sql}{$machine}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_uuid" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_uuid}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_units" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_units}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_status" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_status}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_high_critical" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_critical}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_high_warning" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_high_warning}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_low_critical" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_critical}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_sensor_low_warning" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_sensor_low_warning}, + "sql::${host_name}::scan_ipmitool_sensor_name::${scan_ipmitool_sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{sql}{$host_name}{scan_ipmitool_sensor_name}{$scan_ipmitool_sensor_name}{scan_ipmitool_value_sensor_value}, }}); } @@ -1895,135 +1897,56 @@ sub query_ipmi_targets { my ($anvil) = @_; - foreach my $machine (sort {$a cmp $b} keys %{$anvil->data->{'scan-ipmitool'}{machine}}) + foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{'scan-ipmitool'}{host_name}}) { - my $ipmitool_command = $anvil->data->{'scan-ipmitool'}{machine}{$machine}{ipmitool_command}; - my $ipmi_password = $anvil->data->{'scan-ipmitool'}{machine}{$machine}{ipmi_password}; + my $ipmitool_command = $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmitool_command}; + my $ipmi_password = $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmi_password}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - machine => $machine, + host_name => $host_name, ipmitool_command => $ipmitool_command, ipmi_password => $anvil->Log->is_secure($ipmi_password), }}); - # If there is a password, write it to a temp file. - my $temp_file = ""; - if ($ipmi_password) - { - # Write the password to a temp file. - $temp_file = "/tmp/scan-ipmitool"; - $anvil->Storage->write_file({ - body => $ipmi_password, - secure => 1, - file => $temp_file, - overwrite => 1, - }); - } - # Time the call. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0001", variables => { machine => $machine }}); - - my $read_start_time = time; - - # Call with a timeout in case the call hangs. - my $psu_count = 0; - my $shell_call = $ipmitool_command." sensor list all"; - if ($ipmi_password) - { - $shell_call = $ipmitool_command." -f ".$temp_file." sensor list all"; - } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0001", variables => { host_name => $host_name }}); - my ($output, $return_code) = $anvil->System->call({timeout => 30, shell_call => $shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - output => $output, - return_code => $return_code, - }}); + # This will call, parse and store the information in (host_name == full host name; + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_value_sensor_value + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_units + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_status + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_high_critical + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_high_warning + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_low_critical + # ipmi::::scan_ipmitool_sensor_name::$sensor_name::scan_ipmitool_sensor_low_warning + $anvil->System->collect_ipmi_data({ + host_name => $host_name, + ipmitool_command => $ipmitool_command, + ipmi_password => $ipmi_password, + }); - foreach my $line (split/\n/, $output) + # Analyze temperature sensors if this is our own data. + if (($host_name eq $anvil->Get->host_name) or ($host_name eq $anvil->Get->short_host_name)) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ">> line" => $line }}); - - # Clean up the output - $line =~ s/^\s+//; - $line =~ s/\s+$//; - $line =~ s/\s+\|/|/g; - $line =~ s/\|\s+/|/g; - - ### TODO: If we determine that the IPMI BMC is hung, set the health to '10' - ### $anvil->data->{'scan-ipmitool'}{health}{new}{'ipmi:bmc_controller'} = 10; - # Catch errors: - if ($line =~ /Activate Session command failed/) + foreach my $sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) { - # Failed to connect. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0002", variables => { - machine => $machine, - call => $ipmitool_command, + my $units = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + sensor_name => $sensor_name, + units => $units, }}); + + # If this is a temperature, check to see if it is outside its nominal range and, if + # so, record it into a hash for loading into ScanCore's 'temperature' table. + if ($units eq "C") + { + log_abnormal_temperatures($anvil, $host_name, $sensor_name); + } } - next if $line !~ /\|/; - - # high fail -------------------------------------. - # high critical ---------------------------------. | - # high warning -----------------------------. | | - # low warning -------------------------. | | | - # low critical ---------------------. | | | | - # low fail -----------------. | | | | | - # status -------------. | | | | | | - # units ---------. | | | | | | | - # current value -----. | | | | | | | | - # sensor name -. | | | | | | | | | - # Columns: | | | | | | | | | | - # x | x | x | x | x | x | x | x | x | x - my ($sensor_name, - $current_value, - $units, - $status, - $low_fail, - $low_critical, - $low_warning, - $high_warning, - $high_critical, - $high_fail) = split /\|/, $line; - - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - sensor_name => $sensor_name, - current_value => $current_value, - units => $units, - status => $status, - low_fail => $low_fail, - low_critical => $low_critical, - low_warning => $low_warning, - high_warning => $high_warning, - high_critical => $high_critical, - high_fail => $high_fail, - }}); - - next if not $sensor_name; - next if not $status; - next if not $units; - next if $units =~ /discrete/; - - $units = "C" if $units =~ /degrees C/i; - $units = "F" if $units =~ /degrees F/i; - $units = "%" if $units =~ /percent/i; - $units = "W" if $units =~ /watt/i; - $units = "V" if $units =~ /volt/i; - - # The BBU and RAID Controller, as reported by IPMI, is flaky and redundant. We - # monitor it via storcli/hpacucli (or OEM variant of), so we ignore it here. - next if $sensor_name eq "BBU"; - next if $sensor_name eq "RAID Controller"; - - # HP seems to stick 'XX-' in front of some sensor names. - $sensor_name =~ s/^\d\d-//; - - # Single PSU hosts often call their PSU just that, without a suffix integer. We'll - # add '1' in such cases. - if ($sensor_name eq "PSU") - { - $sensor_name = "PSU1"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); - } + } + + my $psu_count = 0; + foreach my $sensor_name (sort {$a cmp $b} keys %{$anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}}) + { if ($sensor_name =~ /^PSU(\d+)/) { my $this_psu = $1; @@ -2034,121 +1957,8 @@ sub query_ipmi_targets $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { psu_count => $psu_count }}); } } - - # Thresholds that are 'na' need to be converted to numeric - $low_fail = -99 if $low_fail eq "na"; - $low_critical = -99 if $low_critical eq "na"; - $low_warning = -99 if $low_warning eq "na"; - $high_warning = 999 if $high_warning eq "na"; - $high_critical = 999 if $high_critical eq "na"; - $high_fail = 999 if $high_fail eq "na"; - - # Values in the DB that are 'double precision' must be '' if not set. - $current_value = '' if not $current_value; - $low_fail = '' if not $low_fail; - $low_critical = '' if not $low_critical; - $low_warning = '' if not $low_warning; - $high_warning = '' if not $high_warning; - $high_critical = '' if not $high_critical; - $high_fail = '' if not $high_fail; - - # Some values list 'inf' on some machines (HP...). Convert these to ''. - $current_value = '' if $current_value eq "inf"; - $low_fail = '' if $low_fail eq "inf"; - $low_critical = '' if $low_critical eq "inf"; - $low_warning = '' if $low_warning eq "inf"; - $high_warning = '' if $high_warning eq "inf"; - $high_critical = '' if $high_critical eq "inf"; - $high_fail = '' if $high_fail eq "inf"; - - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - sensor_name => $sensor_name, - current_value => $current_value, - units => $units, - status => $status, - low_fail => $low_fail, - low_critical => $low_critical, - low_warning => $low_warning, - high_warning => $high_warning, - high_critical => $high_critical, - high_fail => $high_fail, - }}); - - if ($units eq "F") - { - # Convert to 'C' - $high_critical = $anvil->Convert->fahrenheit_to_celsius({temperature => $high_critical}) if $high_critical ne ""; - $high_warning = $anvil->Convert->fahrenheit_to_celsius({temperature => $high_warning}) if $high_warning ne ""; - $low_critical = $anvil->Convert->fahrenheit_to_celsius({temperature => $low_critical}) if $low_critical ne ""; - $low_warning = $anvil->Convert->fahrenheit_to_celsius({temperature => $low_warning}) if $low_warning ne ""; - $units = "C"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - low_critical => $low_critical, - low_warning => $low_warning, - high_warning => $high_warning, - high_critical => $high_critical, - units => $units, - }}); - } - - ### TODO: It looks like the PSU state and the PSU temperature are called, simply, - ### 'PSUx'... If so, change the temperature to 'PSUx Temperature' - if (($units eq "C") && ($sensor_name =~ /^PSU\d/i)) - { - $sensor_name .= " Temperature"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); - } - - # Similarly, 'PSUx Power' is used for power status and wattage.... - if (($units eq "W") && ($sensor_name =~ /PSU\d Power/i)) - { - $sensor_name =~ s/Power/Wattage/; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); - } - - # And again, 'FAN PSUx' is used for both RPM and state... - if (($units eq "RPM") && ($sensor_name =~ /^FAN PSU\d/i)) - { - $sensor_name .= " RPM"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sensor_name => $sensor_name }}); - } - - # Record - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value} = $current_value; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units} = $units; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status} = $status; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_critical} = $high_critical; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_warning} = $high_warning; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_critical} = $low_critical; - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_warning} = $low_warning; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_value_sensor_value" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_value_sensor_value}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_units" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_units}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_status" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_status}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_high_critical" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_critical}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_high_warning" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_high_warning}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_low_critical" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_critical}, - "ipmi::${machine}::scan_ipmitool_sensor_name::${sensor_name}::scan_ipmitool_sensor_low_warning" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$sensor_name}{scan_ipmitool_sensor_low_warning}, - }}); - - # If this is a temperature, check to see if it is outside its nominal range and, if - # so, record it into a hash for loading into ScanCore's 'temperature' table. - if ($units eq "C") - { - log_abnormal_temperatures($anvil, $machine, $sensor_name); - } } - # Delete the temp file. - #unlink $temp_file; - - # Record how long it took. - my $sensor_read_time = $anvil->Convert->time({'time' => (time - $read_start_time)}); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_ipmitool_log_0003", variables => { - machine => $machine, - 'time' => $sensor_read_time - }}); - # If a PSU is OK, but its wattage is 0, input power was lost. We'll switch the PSU state to # ensure this sets a health value. We'll check for five PSUs, though very very few should # have more than 2. @@ -2161,13 +1971,13 @@ sub query_ipmi_targets psu_key => $psu_key, }}); - if (exists $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$psu_key}) + if (exists $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$psu_key}) { # TODO: Is this the key for HP machines, too? my $wattage_key = $psu_key." Power"; - my $psu_value = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_value_sensor_value}; - my $psu_status = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status}; - my $psu_wattage = $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$wattage_key}{scan_ipmitool_value_sensor_value}; + my $psu_value = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_value_sensor_value}; + my $psu_status = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status}; + my $psu_wattage = $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$wattage_key}{scan_ipmitool_value_sensor_value}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { wattage_key => $wattage_key, psu_value => $psu_value, @@ -2178,9 +1988,9 @@ sub query_ipmi_targets if ((lc($psu_status) eq "ok") && (lc($psu_wattage) eq "no reading")) { # Change the status to 'no signal' - $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status} = "ns"; + $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status} = "ns"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "ipmi::${machine}::scan_ipmitool_sensor_name::${psu_key}::scan_ipmitool_sensor_status" => $anvil->data->{ipmi}{$machine}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status}, + "ipmi::${host_name}::scan_ipmitool_sensor_name::${psu_key}::scan_ipmitool_sensor_status" => $anvil->data->{ipmi}{$host_name}{scan_ipmitool_sensor_name}{$psu_key}{scan_ipmitool_sensor_status}, }}); } } @@ -2223,14 +2033,14 @@ sub find_ipmi_targets { # We're good. $ipmi_targets++; - my $host_name = $anvil->Get->host_name(); - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{host_ipmi} = ""; - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmitool_command} = $anvil->data->{path}{exe}{ipmitool}; - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmi_password} = ""; + my $host_name = $anvil->Get->host_name(); + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{host_ipmi} = ""; + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmitool_command} = $anvil->data->{path}{exe}{ipmitool}; + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmi_password} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "scan-ipmitool::machine::${host_name}::host_ipmi" => $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{host_ipmi}, - "scan-ipmitool::machine::${host_name}::ipmitool_command" => $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmitool_command}, - "scan-ipmitool::machine::${host_name}::ipmi_password" => $anvil->Log->is_secure($anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmi_password}), + "scan-ipmitool::host_name::${host_name}::host_ipmi" => $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{host_ipmi}, + "scan-ipmitool::host_name::${host_name}::ipmitool_command" => $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmitool_command}, + "scan-ipmitool::host_name::${host_name}::ipmi_password" => $anvil->Log->is_secure($anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmi_password}), }}); } } @@ -2280,68 +2090,25 @@ AND $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target => $target }}); if ($target) { - ($access) = $anvil->Network->ping({ping => $target}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + ($access, my $average_time) = $anvil->Network->ping({ping => $target}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + access => $access, + average_time => $average_time, + }}); } next if not $access; $ipmi_targets++; # Convert to an 'ipmitool' call. - my $ipmitool_command = $anvil->data->{path}{exe}{ipmitool}; - my $ipmi_password = ""; - if (($host_ipmi =~ /-A (.*?) /) or ($host_ipmi =~ /-auth (.*?) /)) - { - # IPMI Lan Auth type (md5, password, or none) - $ipmitool_command .= " -A ".$1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-a (.*?) /) or ($host_ipmi =~ /-ip (.*?) /)) - { - # IPMI Lan IP to talk to - $ipmitool_command .= " -H ".$1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-P /) or ($host_ipmi =~ /-lanplus /)) - { - # Use Lanplus to improve security of connection - $ipmitool_command .= " -I lanplus"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-l (.*?) /) or ($host_ipmi =~ /-username (.*?) /)) - { - # Username/Login (if required) to control power on IPMI device - $ipmitool_command .= " -U ".$1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-C (.*?) /) or ($host_ipmi =~ /-cipher (.*?) /)) - { - # Ciphersuite to use (same as ipmitool -C parameter) - $ipmitool_command .= " -C ".$1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-L (.*?) /) or ($host_ipmi =~ /-privlvl (.*?) /)) - { - # Privilege level on IPMI device - $ipmitool_command .= " -L ".$1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ipmitool_command => $ipmitool_command }}); - } - if (($host_ipmi =~ /-p (.*?) -/) or ($host_ipmi =~ /-password (.*?) -/) or ($host_ipmi =~ /-password '(.*?)'/)) - { - # Password (if required) to control power on IPMI device - $ipmi_password = $1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, secure => 1, level => 2, list => { ">> ipmi_password" => $ipmi_password }}); - $ipmi_password =~ s/^'(.*?)'$/$1/; - $ipmi_password =~ s/\\'/'/g; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, secure => 1, level => 2, list => { "<< ipmi_password" => $ipmi_password }}); - } + my ($ipmitool_command, $ipmi_password) = $anvil->Convert->fence_ipmilan_to_ipmitool({fence_ipmilan_command => $host_ipmi}); - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{host_ipmi} = $host_ipmi; - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmitool_command} = $ipmitool_command; - $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmi_password} = $ipmi_password; + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{host_ipmi} = $host_ipmi; + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmitool_command} = $ipmitool_command; + $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmi_password} = $ipmi_password; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "scan-ipmitool::machine::${host_name}::host_ipmi" => $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{host_ipmi}, - "scan-ipmitool::machine::${host_name}::ipmitool_command" => $anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmitool_command}, - "scan-ipmitool::machine::${host_name}::ipmi_password" => $anvil->Log->is_secure($anvil->data->{'scan-ipmitool'}{machine}{$host_name}{ipmi_password}), + "scan-ipmitool::host_name::${host_name}::host_ipmi" => $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{host_ipmi}, + "scan-ipmitool::host_name::${host_name}::ipmitool_command" => $anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmitool_command}, + "scan-ipmitool::host_name::${host_name}::ipmi_password" => $anvil->Log->is_secure($anvil->data->{'scan-ipmitool'}{host_name}{$host_name}{ipmi_password}), }}); } diff --git a/scancore-agents/scan-ipmitool/scan-ipmitool.xml b/scancore-agents/scan-ipmitool/scan-ipmitool.xml index 459db991..9f512bdd 100644 --- a/scancore-agents/scan-ipmitool/scan-ipmitool.xml +++ b/scancore-agents/scan-ipmitool/scan-ipmitool.xml @@ -15,8 +15,8 @@ NOTE: All string keys MUST be prefixed with the agent name! ie: 'scan_ipmitool_l No IPMI BMC found on this host nor where any other machines with IPMI found or where accessible. Nothing to do. - There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!machine!#]. - There was no IPMI sensor value set for sensor: [#!variable!sensor!#] on the machine: [#!variable!machine!#]. + There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!host_name!#]. + There was no IPMI sensor value set for sensor: [#!variable!sensor!#] on the machine: [#!variable!host_name!#]. The sensor: [#!variable!sensor_name!#] has changed. - [#!variable!old_sensor_value!#] -> [#!variable!new_sensor_value!#] @@ -76,7 +76,7 @@ The temperature sensor: [#!variable!sensor_name!#] has risen blow critically hig - [#!variable!old_sensor_value!#] -> [#!variable!new_sensor_value!#] Note: If you are listening to 'critical' level alerts only, you will not get the alert telling you when the temperature is back to normal. - There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!machine!#]. + There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!host_name!#]. The sensor: [#!variable!sensor_name!#] has changed. - [#!variable!old_sensor_value!#] -> [#!variable!new_sensor_value!#] @@ -87,10 +87,10 @@ The sensor: [#!variable!sensor_name!#] has changed. - Low warning: [#!variable!old_low_warning!#] -> [#!variable!new_low_warning!#] - Low critical: [#!variable!old_low_critical!#] -> [#!variable!new_low_critical!#] - There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!machine!#]. - There was no IPMI sensor value set for sensor: [#!variable!sensor!#] on the machine: [#!variable!machine!#]. + There was no IPMI sensor value units set for sensor: [#!variable!sensor!#] on the machine: [#!variable!host_name!#]. + There was no IPMI sensor value set for sensor: [#!variable!sensor!#] on the machine: [#!variable!host_name!#]. -The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!variable!machine!#]. +The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!variable!host_name!#]. - Value: [#!variable!sensor_value!#], Status: [#!variable!sensor_status!#] - Thresholds: - High critical: [#!variable!high_critical!#] @@ -99,7 +99,7 @@ The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!var - Low critical: [#!variable!low_critical!#] -The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!variable!machine!#]. +The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!variable!host_name!#]. Warning: It is not in an OK state! - Value: [#!variable!sensor_value!#], Status: [#!variable!sensor_status!#] - Thresholds: @@ -110,9 +110,9 @@ The new sensor: [#!variable!sensor_name!#] has been found on the machine: [#!var - Starting to read the IPMI sensor values for: [#!variable!machine!#] - Failed to query node: [#!variable!machine!#]'s IPMI interface using the call: [#!variable!call!#]. Is the password correct? - IPMI sensor values read from: [#!variable!machine!#] in: [#!variable!time!#]. + Starting to read the IPMI sensor values for: [#!variable!host_name!#] + Failed to query node: [#!variable!host_name!#]'s IPMI interface using the call: [#!variable!call!#]. Is the password correct? + IPMI sensor values read from: [#!variable!host_name!#] in: [#!variable!time!#]. The sensor named: [#!variable!sensor_name!#] appears to have vanished, but this is the first scan that it vanished. This is generally harmless and just a sensor read issue. The sensor named: [#!variable!sensor_name!#] has returned. diff --git a/share/words.xml b/share/words.xml index dc36699f..d58f5e38 100644 --- a/share/words.xml +++ b/share/words.xml @@ -240,6 +240,7 @@ The error was: The temperature: [#!variable!temperature!#] does not appear to be valid.. The resource: [#!variable!resource!#] in the config file: [#!variable!file!#] was found, but does not appear to be a valid UUID: [#!variable!uuid!#]. The resource: [#!variable!resource!#] in the config file: [#!variable!file!#] was found, and we were asked to replace the 'scan_drbd_resource_uuid' but the new UUID: [#!variable!uuid!#] is not a valud UUID. + The 'fence_ipmilan' command: [#!variable!command!#] does not appear to be valid. Current Network Interfaces and States @@ -1071,6 +1072,24 @@ The file: [#!variable!file!#] needs to be updated. The difference is: ==== Scan agent: [#!variable!agent_name!#] exited after: [#!variable!runtime!#] seconds with the return code: [#!variable!return_code!#]. + I'm not on the same network as: [#!variable!host_name!#]. Unable to check the power state. + The host: [#!variable!host_name!#] appears to be off, but there's no IPMI information, so unable to check the power state or power on the machine. + The host: [#!variable!host_name!#] has no IPMI information. Wouldn't be able to boot it, even if it's off, so skipping it. + The host: [#!variable!host_name!#] will be checked to see if it needs to be booted or not. + The host: [#!variable!host_name!#] is up, no need to check if it needs booting. + The host: [#!variable!host_name!#] couldn't be reached directly, but IPMI reports that it is up. Could the IPMI BMC be hung or unplugged? + The host: [#!variable!host_name!#] is off. Will check now if it should be booted. + The host: [#!variable!host_name!#] has no stop reason, so we'll leave it off. + The host: [#!variable!host_name!#] was stopped by the user, so we'll leave it off. + The host: [#!variable!host_name!#] was powered off because of power loss. Checking to see if it is now safe to restart it. + The host: [#!variable!host_name!#] was powered off because of thermal issues. Checking to see if it is now safe to restart it. + Unable to find an install manifest for the Anvil! [#!variable!anvil_name!#]. As such, unable to determine what UPSes power the machine: [#!variable!host_name!#]. Unable to determine if the power feeding this node is OK or not. + Unable to parse the install manifest uuid: [#!variable!manifest_uuid!#] for the Anvil! [#!variable!anvil_name!#]. As such, unable to determine what UPSes power the machine: [#!variable!host_name!#]. Unable to determine if the power feeding this node is OK or not. + The UPS referenced by the 'power_uuid': [#!variable!power_uuid!#] under the host: [#!variable!host_name!#] has no record of being on mains power, so we can't determine how long it's been on batteries. Setting the "shortest time on batteries" to zero seconds. + Clearing the host's stop reason. + The host: [#!variable!host_name!#] is off, but there appears to be a problem translating the 'fence_ipmilan' into a workable 'ipmitool' command. Unable to check the thermal data of the host, and so, unable to determine if it's safe to boot the node. + The host: [#!variable!host_name!#] was powered off because of power loss. Power is back and the UPSes are sufficiently charged. Booting it back up now. + The host: [#!variable!host_name!#] was powered off for thermal reasons. All available thermal sensors read as OK now. Booting it back up now. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -1662,6 +1681,7 @@ If you are comfortable that the target has changed for a known reason, you can s This tracks the last time a given mail server was configured for use. It allows for a round-robin switching of mail servers when one mail server stops working and two or more mail servers have been configured. No UPSes This is a condition record, used by programs like scan agents to track how long a condition has existed for. + This indicated why a machine was powered off. This is used by ScanCore to decide if or when to power up the target host. #!variable!number!#/sec diff --git a/tools/scancore b/tools/scancore index b82cbba8..dc97d32f 100755 --- a/tools/scancore +++ b/tools/scancore @@ -13,6 +13,9 @@ # - Decide if it's worth having a separate ScanCore.log file or just feed into anvil.log. # - Examine limits in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU= # - Use 'nvme-cli' to write a scan-nvme scan agent, can get thermal and wear data +# - Record how long a server's migration took in the past, and use that to determine which node to evacuate +# during load shed. Also, track how long it takes for servers to stop to determine when to initiate a total +# shutdown. # - use strict; @@ -50,6 +53,9 @@ $anvil->data->{scancore} = { warning_temperature => 5, warning_critical => 5, }, + power => { + safe_boot_percentage => 35, + }, }; $anvil->Storage->read_config(); @@ -68,6 +74,9 @@ wait_for_database($anvil); # If we're not configured, sleep. wait_until_configured($anvil); +# Startup tasks. +startup_tasks($anvil); + # Load the strings from all the agents we know about before we process alerts so that we can include their # messages in any emails we're going to send. load_agent_strings($anvil); @@ -92,7 +101,10 @@ while(1) if ($anvil->data->{sys}{database}{connections}) { # Run the normal tasks - call_agents($anvil); + $anvil->ScanCore->call_agents({debug => 2}); + + # Do post-scan analysis. + $anvil->ScanCore->post_scan_analysis({debug => 2}); } else { @@ -141,91 +153,6 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# -# This invokes all scan agents found in 'path::directories::scan_agents' -sub call_agents -{ - my ($anvil) = @_; - - # Get the current list of scan agents on this system. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "path::directories::scan_agents" => $anvil->data->{path}{directories}{scan_agents}, - }}); - scan_directory($anvil, $anvil->data->{path}{directories}{scan_agents}); - - # Now loop through the agents I found and call them. - my $timeout = 30; - if ((exists $anvil->data->{scancore}{timing}{agent_runtime}) && ($anvil->data->{scancore}{timing}{agent_runtime} =~ /^\d+$/)) - { - $timeout = $anvil->data->{scancore}{timing}{agent_runtime}; - } - foreach my $agent_name (sort {$a cmp $b} keys %{$anvil->data->{scancore}{agent}}) - { - my $agent_path = $anvil->data->{scancore}{agent}{$agent_name}; - my $agent_words = $agent_path.".xml"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - agent_name => $agent_name, - agent_path => $agent_path, - agent_words => $agent_words, - }}); - - if ((-e $agent_words) && (-r $agent_words)) - { - # Read the words file so that we can generate alerts later. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0251", variables => { - agent_name => $agent_name, - file => $agent_words, - }}); - $anvil->Words->read({file => $agent_words}); - } - - # Set the timeout. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - agent_name => $agent_name, - "scancore::${agent_name}::timeout" => $anvil->data->{scancore}{$agent_name}{timeout}, - }}); - - # Now call the agent. - my $start_time = time; - if (($anvil->data->{scancore}{$agent_name}{timeout}) && ($anvil->data->{scancore}{$agent_name}{timeout} =~ /^\d+$/)) - { - $timeout = $anvil->data->{scancore}{$agent_name}{timeout}; - } - my $shell_call = $agent_path; - if ($anvil->data->{sys}{'log'}{level}) - { - $shell_call .= " ".$anvil->data->{sys}{'log'}{level}; - } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); - - # Tell the user this agent is about to run... - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0252", variables => { - agent_name => $agent_name, - timeout => $timeout, - }}); - my ($output, $return_code) = $anvil->System->call({timeout => $timeout, shell_call => $shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code }}); - foreach my $line (split/\n/, $output) - { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); - } - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0557", variables => { - agent_name => $agent_name, - runtime => (time - $start_time), - return_code => $return_code, - }}); - - # If the return code is '124', timeout popped. - if ($return_code eq "124") - { - ### TODO: Check if this alert was set so it only goes out once. - # Register an alert... - $anvil->Alert->register({set_by => $THIS_FILE, alert_level => "notice", message => "message_0180,!!agent_name!".$agent_name."!!,!!timeout!".$timeout."!!"}); - } - } - - return(0); -} - # This cleans things up after a scan run has completed. sub cleanup_after_run { @@ -309,47 +236,6 @@ sub prepare_for_run return(0); } -# This looks in the passed-in directory for scan agents or sub-directories (which will in turn be scanned). -sub scan_directory -{ - my ($anvil, $directory) = @_; - - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { directory => $directory }}); - local(*DIRECTORY); - opendir(DIRECTORY, $directory); - while(my $file = readdir(DIRECTORY)) - { - next if $file eq "."; - next if $file eq ".."; - my $full_path = $directory."/".$file; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - file => $file, - full_path => $full_path, - }}); - # If we're looking at a directory, scan it. Otherwise, see if it's an executable and that it - # starts with 'scan-*'. - if (-d $full_path) - { - # This is a directory, dive into it. - scan_directory($anvil, $full_path); - } - elsif (-x $full_path) - { - # Now I only want to know if the file starts with 'scan-' - next if $file !~ /^scan-/; - - # If I am still alive, I am looking at a scan agent! - $anvil->data->{scancore}{agent}{$file} = $full_path; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "scancore::agent::${file}" => $anvil->data->{scancore}{agent}{$file}, - }}); - } - } - closedir(DIRECTORY); - - return(0); -} - # This loops until it can connect to at least one database. sub wait_for_database { @@ -428,6 +314,28 @@ sub wait_until_configured return(0); } +# Things we need to do at startup. +sub startup_tasks +{ + my ($anvil) = @_; + + # Make sure our stop reason is cleared. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0572"}); + my $variable_uuid = $anvil->Database->insert_or_update_variables({ + debug => 2, + variable_name => 'system::stop_reason', + variable_value => '', + variable_default => '', + variable_description => 'striker_0279', + variable_section => 'system', + variable_source_uuid => '4c4c4544-0043-4210-8042-c3c04f523533', + variable_source_table => 'hosts', + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + + return(0); +} + =pod "I'm sorry, but I don't want to be an emperor. That's not my business. I don't want to rule or conquer anyone. I should like to help everyone if possible - Jew, Gentile - black man - white. diff --git a/tools/striker-manage-install-target b/tools/striker-manage-install-target index cd54416b..8a555e5f 100755 --- a/tools/striker-manage-install-target +++ b/tools/striker-manage-install-target @@ -1251,14 +1251,15 @@ ORDER BY { foreach my $interface (sort {$a cmp $b} keys %{$match->{$short_host_name}}) { - my $remote_ip = $match->{$short_host_name}{$interface}{ip}; - my $pinged = $anvil->Network->ping({ + my $remote_ip = $match->{$short_host_name}{$interface}{ip}; + my ($pinged, $average_time) = $anvil->Network->ping({ ping => $remote_ip, count => 1, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - remote_ip => $remote_ip, - pinged => $pinged, + remote_ip => $remote_ip, + pinged => $pinged, + average_time => $average_time, }}); if ($pinged) { @@ -1473,6 +1474,7 @@ sub load_packages "adwaita-icon-theme.noarch", "alsa-lib.x86_64", "alteeve-el8-repo.noarch", + "annobin.x86_64", "anvil-core.noarch", "anvil-dr.noarch", "anvil-node.noarch", @@ -1562,6 +1564,7 @@ sub load_packages "cronie.x86_64", "crontabs.noarch", "crypto-policies.noarch", + "crypto-policies-scripts.noarch", "cryptsetup-libs.x86_64", "cups-libs.x86_64", "cups-pk-helper.x86_64", @@ -1611,13 +1614,17 @@ sub load_packages "dracut.x86_64", "drbd90-utils.x86_64", "drpm.x86_64", + "dwz.x86_64", ], e => [ "e2fsprogs-libs.x86_64", "e2fsprogs.x86_64", "edk2-ovmf.noarch", + "efi-srpm-macros.noarch", + "elfutils.x86_64", "elfutils-default-yama-scope.noarch", "elfutils-libelf.x86_64", + "elfutils-libelf-devel.x86_64", "elfutils-libs.x86_64", "emacs-filesystem.noarch", "enchant.x86_64", @@ -1689,8 +1696,10 @@ sub load_packages g => [ "gawk.x86_64", "GConf2.x86_64", + "gc.x86_64", "gcc.x86_64", "gcr.x86_64", + "gdb-headless.x86_64", "gdbm-libs.x86_64", "gdbm.x86_64", "gdisk.x86_64", @@ -1705,6 +1714,7 @@ sub load_packages "geolite2-country.noarch", "gettext-libs.x86_64", "gettext.x86_64", + "ghc-srpm-macros.noarch", "gjs.x86_64", "glib-networking.x86_64", "glib2.x86_64", @@ -1741,6 +1751,7 @@ sub load_packages "gnutls-dane.x86_64", "gnutls-utils.x86_64", "gnutls.x86_64", + "go-srpm-macros.noarch", "gobject-introspection.x86_64", "gpgme.x86_64", "gpm-libs.x86_64", @@ -1766,6 +1777,7 @@ sub load_packages "gtk-vnc2.x86_64", "gtk2.x86_64", "gtk3.x86_64", + "guile.x86_64", "gvfs.x86_64", "gvfs-client.x86_64", "gvnc.x86_64", @@ -1862,7 +1874,6 @@ sub load_packages "krb5-libs.x86_64", ], l => [ - "langpacks-en.noarch", "lcms2.x86_64", "less.x86_64", @@ -1897,7 +1908,9 @@ sub load_packages "libassuan.x86_64", "libasyncns.x86_64", "libatasmart.x86_64", + "libatomic_ops.x86_64", "libattr.x86_64", + "libbabeltrace.x86_64", "libbasicobjects.x86_64", "libblkid.x86_64", "libbytesize.x86_64", @@ -1963,6 +1976,7 @@ sub load_packages "libimobiledevice.x86_64", "libini_config.x86_64", "libinput.x86_64", + "libipt.x86_64", "libiscsi.x86_64", "libjpeg-turbo.x86_64", "libkcapi-hmaccalc.x86_64", @@ -2129,6 +2143,7 @@ sub load_packages 'm' => [ "mailcap.noarch", "mailx.x86_64", + "make.x86_64", "man-db.x86_64", "mcpp.x86_64", "mdadm.x86_64", @@ -2173,16 +2188,18 @@ sub load_packages "nmap.x86_64", "npth.x86_64", "nspr.x86_64", + "nss.x86_64", "nss-softokn-freebl.x86_64", "nss-softokn.x86_64", "nss-sysinit.x86_64", "nss-util.x86_64", - "nss.x86_64", "numactl-libs.x86_64", "numad.x86_64", "nvme-cli.x86_64", ], o => [ + "ocaml-srpm-macros.noarch", + "openblas-srpm-macros.noarch", "openldap.x86_64", "openssh-clients.x86_64", "openssh-server.x86_64", @@ -2207,6 +2224,7 @@ sub load_packages "pango.x86_64", "parted.x86_64", "passwd.x86_64", + "patch.x86_64", "pciutils-libs.x86_64", "pciutils.x86_64", "pcre.x86_64", @@ -2214,6 +2232,7 @@ sub load_packages "perl-aliased.noarch", "perl-Algorithm-C3.noarch", "perl-Algorithm-Diff.noarch", + "perl-Authen-SASL.noarch", "perl-B-Hooks-EndOfScope.noarch", "perl-CGI.noarch", "perl-Capture-Tiny.noarch", @@ -2223,6 +2242,7 @@ sub load_packages "perl-Class-Method-Modifiers.noarch", "perl-Compress-Raw-Bzip2.x86_64", "perl-Compress-Raw-Zlib.x86_64", + "perl-Convert-ASN1.noarch", "perl-Curses.x86_64", "perl-Curses-UI.noarch", "perl-DBD-Pg.x86_64", @@ -2266,6 +2286,7 @@ sub load_packages "perl-Filter-Simple.noarch", "perl-Future.noarch", "perl-Getopt-Long.noarch", + "perl-GSSAPI.x86_64", "perl-HTML-FromText.noarch", "perl-HTML-Parser.x86_64", "perl-HTML-Strip.x86_64", @@ -2285,6 +2306,7 @@ sub load_packages "perl-IPC-SysV.x86_64", "perl-JSON.noarch", "perl-JSON-PP.noarch", + "perl-LDAP.noarch", "perl-LWP-MediaTypes.noarch", "perl-Log-Contextual.noarch", "perl-Log-Dispatch-FileRotate.noarch", @@ -2337,6 +2359,7 @@ sub load_packages "perl-Socket.x86_64", "perl-Socket6.x86_64", "perl-Specio.noarch", + "perl-srpm-macros.noarch", "perl-Storable.x86_64", "perl-strictures.noarch", "perl-String-ShellQuote.noarch", @@ -2354,7 +2377,9 @@ sub load_packages "perl-Test-Simple.noarch", "perl-Text-Diff.noarch", "perl-Text-ParseWords.noarch", + "perl-Text-Soundex.x86_64", "perl-Text-Tabs+Wrap.noarch", + "perl-Text-Unidecode.noarch", "perl-Time-HiRes.x86_64", "perl-Time-Local.noarch", "perl-TimeDate.noarch", @@ -2418,7 +2443,7 @@ sub load_packages "pulseaudio-libs.x86_64", "pulseaudio-module-bluetooth.x86_64", "pulseaudio.x86_64", - #"python3-IPy.noarch", + "python-srpm-macros.noarch", "python3-asn1crypto.noarch", "python3-audit.x86_64", "python3-argcomplete.noarch", @@ -2468,8 +2493,10 @@ sub load_packages "python3-pyparsing.noarch", "python3-pysocks.noarch", "python3-pyudev.noarch", + "python3-pyyaml.x86_64", "python3-requests.noarch", "python3-rpm.x86_64", + "python3-rpm-macros.noarch", "python3-schedutils.x86_64", "python3-setools.x86_64", "python3-setuptools.noarch", @@ -2498,6 +2525,7 @@ sub load_packages "qemu-kvm-common.x86_64", "qemu-kvm-core.x86_64", "qemu-kvm.x86_64", + "qt5-srpm-macros.noarch", "quota-nls.noarch", "quota.x86_64", ], @@ -2505,10 +2533,13 @@ sub load_packages "radvd.x86_64", "rdma-core.x86_64", "readline.x86_64", + "redhat-rpm-config.noarch", "rest.x86_64", "rootfiles.noarch", "rpcbind.x86_64", + "rpm-build.x86_64", "rpm-build-libs.x86_64", + "rpmdevtools.noarch", "rpm-libs.x86_64", "rpm-plugin-selinux.x86_64", "rpm-plugin-systemd-inhibit.x86_64", @@ -2527,6 +2558,7 @@ sub load_packages "rubygem-psych.x86_64", "rubygem-rdoc.noarch", "rubygems.noarch", + "rust-srpm-macros.noarch", ], 's' => [ "samba-client-libs.x86_64", @@ -2622,8 +2654,6 @@ sub load_packages w => [ "webkit2gtk3.x86_64", "webkit2gtk3-jsc.x86_64", - "webkit2gtk3-plugin-process-gtk2.x86_64", - "webkit2gtk3.x86_64", "webrtc-audio-processing.x86_64", "wget.x86_64", "which.x86_64", @@ -2654,9 +2684,11 @@ sub load_packages ], z => [ "zlib.x86_64", + "zlib-devel.x86_64", + "zstd.x86_64", ], }; - + # These packages can't be downloaded on RHEL Striker dashboads as they usually are not entitled to $anvil->data->{ha_packages} = { c => [ @@ -2692,6 +2724,7 @@ sub load_packages "resource-agents.x86_64", ], }; + my ($os_type, $os_arch) = $anvil->Get->os_type(); if ($os_type eq "rhel8") @@ -2709,8 +2742,7 @@ sub load_packages push @{$anvil->data->{packages}{c}}, "centos-indexhtml.noarch"; push @{$anvil->data->{packages}{c}}, "centos-logos-httpd.noarch"; push @{$anvil->data->{packages}{c}}, "centos-logos.x86_64"; - push @{$anvil->data->{packages}{c}}, "centos-release.x86_64"; - push @{$anvil->data->{packages}{c}}, "centos-repos.x86_64"; + push @{$anvil->data->{packages}{c}}, "centos-linux-release.noarch"; # While we're here, we will need to rename /var/www/html/rhel8 to /var/www/html/rhel8, as # 'centos8' and '/var/lib/tftpboot/rhel8' as 'centos8', as is used by anvil-striker-extra. diff --git a/tools/test.pl b/tools/test.pl index 448dd0aa..cfd1d8ec 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -23,16 +23,39 @@ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => " $anvil->Get->switches; # Connect to the database(s). -#$anvil->Database->connect; -#$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); +$anvil->Database->connect; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); -my $hours = 0; -my $minutes = 3; -my $seconds = 24; +if (0) +{ + foreach my $uuid ("4c4c4544-0043-4210-8043-c3c04f523533", "4c4c4544-0043-4210-8042-c3c04f523533", "30343536-3138-5355-4534-3238324b4842", "b4e46faf-0ebe-e211-a0d6-00262d0ca874", "4ba42b4e-9bf7-e311-a889-899427029de4") + { + my $variable_uuid = $anvil->Database->insert_or_update_variables({ + debug => 2, + variable_name => 'system::stop_reason', + variable_value => 'thermal', + variable_default => '', + variable_description => 'striker_0279', + variable_section => 'system', + variable_source_uuid => $uuid, + variable_source_table => 'hosts', + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + } +} -print "Hours: [".$hours."], minutes: [".$minutes."], seconds: [".$seconds."]\n"; +if (1) +{ + $anvil->ScanCore->post_scan_analysis({debug => 3}); +} -my $estimated_time_to_sync = (($hours * 3600) + ($minutes * 60) + $seconds); -print "ETA: [".$estimated_time_to_sync."] (".$anvil->Convert->time({'time' => $estimated_time_to_sync}).")\n"; +if (0) +{ + my $problem = $anvil->Striker->load_manifest({ + debug => 2, + manifest_uuid => "006ee2cb-1fbd-4ea6-89d6-96cf3bc94940", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); +} $anvil->nice_exit({exit_code => 0});