* Created the ScanCore.pm module with the first 'agent_startup' method which generalized scan agent start up.

* Updated Alert->register to take a hash reference for message variables to simplify when a caller plans to log and register an alert at the same time.
* Updated Convert->bytes_to_human_readable() to name the 'size' variable used internally for 'bytes' to actually be 'bytes' for better consistency.
* Created multiple new Database methods;
** ->check_condition_age() is meant to be used by scan agents to see how long a given condition has been in play (ie: how long ago power was lost to a UPS or a sensor became unreadable).
** ->insert_or_update_health() handles recording data to the new 'health' table, used for determining ideal hosts for servers between nodes.
** ->insert_or_update_power() handles recording data to the new 'power' table, used for determining how power events are handled.
** ->insert_or_update_temperature() handles recording temperature data to the new 'temperature' table, used to determine how thermal events are handled.
* Got a lot more done on the scan-hardware scan agent. Only part left now is post-scan health processing.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 28ac266024
commit 0a1dc809a2
  1. 21
      Anvil/Tools.pm
  2. 51
      Anvil/Tools/Alert.pm
  3. 26
      Anvil/Tools/Convert.pm
  4. 1226
      Anvil/Tools/Database.pm
  5. 177
      Anvil/Tools/ScanCore.pm
  6. 188
      notes
  7. 1
      rpm/SPECS/anvil.spec
  8. 1551
      scancore-agents/scan-hardware/scan-hardware
  9. 247
      scancore-agents/scan-hardware/scan-hardware.sql
  10. 141
      scancore-agents/scan-hardware/scan-hardware.xml
  11. 186
      share/anvil.sql
  12. 5
      share/words.xml
  13. 7
      tools/scancore
  14. 15
      tools/test.pl

@ -51,6 +51,7 @@ use Anvil::Tools::Job;
use Anvil::Tools::Log; use Anvil::Tools::Log;
use Anvil::Tools::Network; use Anvil::Tools::Network;
use Anvil::Tools::Remote; use Anvil::Tools::Remote;
use Anvil::Tools::ScanCore;
use Anvil::Tools::Server; use Anvil::Tools::Server;
use Anvil::Tools::Striker; use Anvil::Tools::Striker;
use Anvil::Tools::Storage; use Anvil::Tools::Storage;
@ -134,6 +135,7 @@ sub new
JOB => Anvil::Tools::Job->new(), JOB => Anvil::Tools::Job->new(),
NETWORK => Anvil::Tools::Network->new(), NETWORK => Anvil::Tools::Network->new(),
REMOTE => Anvil::Tools::Remote->new(), REMOTE => Anvil::Tools::Remote->new(),
SCANCORE => Anvil::Tools::ScanCore->new(),
SERVER => Anvil::Tools::Server->new(), SERVER => Anvil::Tools::Server->new(),
STRIKER => Anvil::Tools::Striker->new(), STRIKER => Anvil::Tools::Striker->new(),
STORAGE => Anvil::Tools::Storage->new(), STORAGE => Anvil::Tools::Storage->new(),
@ -177,6 +179,7 @@ sub new
$anvil->Job->parent($anvil); $anvil->Job->parent($anvil);
$anvil->Network->parent($anvil); $anvil->Network->parent($anvil);
$anvil->Remote->parent($anvil); $anvil->Remote->parent($anvil);
$anvil->ScanCore->parent($anvil);
$anvil->Server->parent($anvil); $anvil->Server->parent($anvil);
$anvil->Striker->parent($anvil); $anvil->Striker->parent($anvil);
$anvil->Storage->parent($anvil); $anvil->Storage->parent($anvil);
@ -592,6 +595,18 @@ sub Remote
return ($self->{HANDLE}{REMOTE}); return ($self->{HANDLE}{REMOTE});
} }
=head2 ScanCore
Access the C<ScanCore.pm> methods via 'C<< $anvil->ScanCore->method >>'.
=cut
sub ScanCore
{
my $self = shift;
return ($self->{HANDLE}{SCANCORE});
}
=head2 Server =head2 Server
Access the C<Server.pm> methods via 'C<< $anvil->Server->method >>'. Access the C<Server.pm> methods via 'C<< $anvil->Server->method >>'.
@ -897,6 +912,9 @@ sub _set_defaults
# grep 'CREATE TABLE' share/anvil.sql | grep -v history. | awk '{print $3}' # grep 'CREATE TABLE' share/anvil.sql | grep -v history. | awk '{print $3}'
core_tables => [ core_tables => [
"hosts", # Always has to be first. "hosts", # Always has to be first.
"health",
"power",
"temperature",
"ssh_keys", "ssh_keys",
"users", "users",
"host_variable", "host_variable",
@ -1184,6 +1202,7 @@ sub _set_paths
ifdown => "/sbin/ifdown", ifdown => "/sbin/ifdown",
ifup => "/sbin/ifup", ifup => "/sbin/ifup",
ip => "/usr/sbin/ip", ip => "/usr/sbin/ip",
'ipmi-oem' => "/usr/sbin/ipmi-oem",
ipmitool => "/usr/bin/ipmitool", ipmitool => "/usr/bin/ipmitool",
'iptables-save' => "/usr/sbin/iptables-save", 'iptables-save' => "/usr/sbin/iptables-save",
journalctl => "/usr/bin/journalctl", journalctl => "/usr/bin/journalctl",
@ -1257,6 +1276,8 @@ sub _set_paths
alert => "/var/log/anvil.alert.log", alert => "/var/log/anvil.alert.log",
}, },
proc => { proc => {
cpuinfo => "/proc/cpuinfo",
meminfo => "/proc/meminfo",
uptime => "/proc/uptime", uptime => "/proc/uptime",
}, },
secure => { secure => {

@ -166,7 +166,7 @@ SELECT
FROM FROM
alert_sent alert_sent
WHERE WHERE
alert_sent_host_uuid = ".$anvil->Database->quote($anvil->data->{sys}{host_uuid})." alert_sent_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
AND AND
alert_set_by = ".$anvil->Database->quote($set_by)." alert_set_by = ".$anvil->Database->quote($set_by)."
AND AND
@ -198,7 +198,7 @@ SELECT
FROM FROM
hosts hosts
WHERE WHERE
host_uuid = ".$anvil->Database->quote($anvil->data->{sys}{host_uuid})." host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
;"; ;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
@ -236,7 +236,7 @@ INSERT INTO
modified_date modified_date
) VALUES ( ) VALUES (
".$anvil->Database->quote($anvil->Get->uuid).", ".$anvil->Database->quote($anvil->Get->uuid).",
".$anvil->Database->quote($anvil->data->{sys}{host_uuid}).", ".$anvil->Database->quote($anvil->Get->host_uuid).",
".$anvil->Database->quote($set_by).", ".$anvil->Database->quote($set_by).",
".$anvil->Database->quote($record_locator).", ".$anvil->Database->quote($record_locator).",
".$anvil->Database->quote($name).", ".$anvil->Database->quote($name).",
@ -317,6 +317,12 @@ This is the message body of the alert. It is expected to be in the format C<< <s
Example with a message alone; C<< foo_0001 >>. Example with a message alone; C<< foo_0001 >>.
Example with two variables; C<< foo_0002,!!bar!abc!!,!!baz!123!! >>. Example with two variables; C<< foo_0002,!!bar!abc!!,!!baz!123!! >>.
B<< Note >>: See C<< message_variables >> for an alternate method of passing variables
=head3 message_variables (optional)
This can be set as a hash reference containing key / variable pairs to inject into the message key. the C<< variable => value >> pairs will be appended to the C<< message >> key automatically. This is meant to simplify when an alert is also being longed, or when a large number of variables are being injected into the string.
=head3 set_by (required) =head3 set_by (required)
This is the name of the program that registered this alert. Usually this is simply the caller's C<< $THIS_FILE >> or C<< $0 >> variable. This is the name of the program that registered this alert. Usually this is simply the caller's C<< $THIS_FILE >> or C<< $0 >> variable.
@ -357,21 +363,23 @@ sub register
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Alert->register()" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Alert->register()" }});
my $alert_level = defined $parameter->{alert_level} ? $parameter->{alert_level} : 0; my $alert_level = defined $parameter->{alert_level} ? $parameter->{alert_level} : 0;
my $clear_alert = defined $parameter->{clear_alert} ? $parameter->{clear_alert} : 0; my $clear_alert = defined $parameter->{clear_alert} ? $parameter->{clear_alert} : 0;
my $message = defined $parameter->{message} ? $parameter->{message} : ""; my $message = defined $parameter->{message} ? $parameter->{message} : "";
my $set_by = defined $parameter->{set_by} ? $parameter->{set_by} : ""; my $message_variables = defined $parameter->{message_variables} ? $parameter->{message_variables} : "",
my $show_header = defined $parameter->{show_header} ? $parameter->{show_header} : 1; my $set_by = defined $parameter->{set_by} ? $parameter->{set_by} : "";
my $sort_position = defined $parameter->{sort_position} ? $parameter->{sort_position} : 9999; my $show_header = defined $parameter->{show_header} ? $parameter->{show_header} : 1;
my $title = defined $parameter->{title} ? $parameter->{title} : ""; my $sort_position = defined $parameter->{sort_position} ? $parameter->{sort_position} : 9999;
my $title = defined $parameter->{title} ? $parameter->{title} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
show_header => $show_header, show_header => $show_header,
clear_alert => $clear_alert, clear_alert => $clear_alert,
alert_level => $alert_level, alert_level => $alert_level,
message => $message, message => $message,
set_by => $set_by, message_variables => ref($message_variables),
sort_position => $sort_position, set_by => $set_by,
title => $title, sort_position => $sort_position,
title => $title,
}}); }});
# Missing parameters? # Missing parameters?
@ -391,6 +399,15 @@ sub register
return("!!error!!"); return("!!error!!");
} }
if (ref($message_variables) eq "HASH")
{
foreach my $variable (sort {$a cmp $b} keys %{$message_variables})
{
my $value = defined $message_variables->{$variable} ? $message_variables->{$variable} : "undefined:".$variable;
$message .= ",!!".$variable."!".$value."!!";
}
}
# If the alert level was a string, convert it to the numerical version. Also check that we've got a # If the alert level was a string, convert it to the numerical version. Also check that we've got a
# sane alert level at all. # sane alert level at all.
if (lc($alert_level) eq "critical") if (lc($alert_level) eq "critical")

@ -182,7 +182,7 @@ This is the number of bytes that will be converted. This can be a signed integer
=head3 unit (optional) =head3 unit (optional)
This is a letter This is a letter that allows the caller to request the returned value be in a given unit, rather than the closest unit for the given value.
=cut =cut
sub bytes_to_human_readable sub bytes_to_human_readable
@ -194,36 +194,36 @@ sub bytes_to_human_readable
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Convert->bytes_to_human_readable()" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Convert->bytes_to_human_readable()" }});
# Now see if the user passed the values in a hash reference or directly. # Now see if the user passed the values in a hash reference or directly.
my $size = defined $parameter->{'bytes'} ? $parameter->{'bytes'} : 0; my $bytes = defined $parameter->{'bytes'} ? $parameter->{'bytes'} : 0;
my $unit = defined $parameter->{unit} ? uc($parameter->{unit}) : ""; my $unit = defined $parameter->{unit} ? uc($parameter->{unit}) : "";
my $base2 = defined $parameter->{base2} ? $parameter->{base2} : $anvil->data->{sys}{use_base2}; my $base2 = defined $parameter->{base2} ? $parameter->{base2} : $anvil->data->{sys}{use_base2};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
size => $size, base2 => $base2,
unit => $unit, 'bytes' => $bytes,
base2 => $base2, unit => $unit,
}}); }});
# Expand exponential numbers. # Expand exponential numbers.
if ($size =~ /(\d+)e\+(\d+)/) if ($bytes =~ /(\d+)e\+(\d+)/)
{ {
my $base = $1; my $base = $1;
my $exp = $2; my $exp = $2;
$size = $base; $bytes = $base;
for (1..$exp) for (1..$exp)
{ {
$size .= "0"; $bytes .= "0";
} }
} }
# Setup my variables. # Setup my variables.
my $suffix = ""; my $suffix = "";
my $human_readable_size = $size; my $human_readable_size = $bytes;
# Store and strip the sign # Store and strip the sign
my $sign = ""; my $sign = "";
if ($human_readable_size =~ /^-/) if ($human_readable_size =~ /^-/)
{ {
$sign = "-"; $sign = "-";
$human_readable_size =~ s/^-//; $human_readable_size =~ s/^-//;
} }
$human_readable_size =~ s/,//g; $human_readable_size =~ s/,//g;
@ -240,6 +240,8 @@ sub bytes_to_human_readable
return ("!!error!!"); return ("!!error!!");
} }
### TODO: We process the bytes here, but maybe we shouldn't so that when this goes into an alert, it
### can be translated later.
# Do the math. # Do the math.
if ($base2) if ($base2)
{ {

File diff suppressed because it is too large Load Diff

@ -0,0 +1,177 @@
package Anvil::Tools::ScanCore;
#
# This module contains methods used to handle message processing related to support of multi-lingual use.
#
use strict;
use warnings;
use Data::Dumper;
use Scalar::Util qw(weaken isweak);
use Data::Dumper;
use Time::HiRes qw(gettimeofday tv_interval);
use Text::Diff;
our $VERSION = "3.0.0";
my $THIS_FILE = "ScanCore.pm";
### Methods;
# agent_startup
=pod
=encoding utf8
=head1 NAME
Anvil::Tools::ScanCore
Provides all methods related to ScanCore and scan agents.
=head1 SYNOPSIS
use Anvil::Tools;
# Get a common object handle on all Anvil::Tools modules.
my $anvil = Anvil::Tools->new();
# Access to methods using '$anvil->ScanCore->X'.
#
# Example using 'agent_startup()';
my $foo_path = $anvil->ScanCore->read({file => $anvil->data->{path}{words}{'anvil.xml'}});
=head1 METHODS
Methods in this module;
=cut
sub new
{
my $class = shift;
my $self = {};
bless $self, $class;
return ($self);
}
# Get a handle on the Anvil::Tools object. I know that technically that is a sibling module, but it makes more
# sense in this case to think of it as a parent.
sub parent
{
my $self = shift;
my $parent = shift;
$self->{HANDLE}{TOOLS} = $parent if $parent;
# Defend against memory leads. See Scalar::Util'.
if (not isweak($self->{HANDLE}{TOOLS}))
{
weaken($self->{HANDLE}{TOOLS});
}
return ($self->{HANDLE}{TOOLS});
}
#############################################################################################################
# Public methods #
#############################################################################################################
# =head3
#
# Private Functions;
#
# =cut
#############################################################################################################
# Private functions #
#############################################################################################################
=head2 agent_startup
This method handles connecting to the databases, loading the agent's schema, resync'ing database tables if needed and reading in the words files.
If there is a problem, this method exits with C<< 1 >>. Otherwise, it exits with C<< 0 >>.
Parameters;
=head3 agent (required)
This is the name of the scan agent. Usually this can be set as C<< $THIS_FILE >>.
=head3 tables (required)
This is an array reference of database tables to check when resync'ing. It is important that the tables are sorted in the order they need to be resync'ed in. (tables with primary keys before their foreign key tables).
=cut
sub agent_startup
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "ScanCore->agent_startup()" }});
my $agent = defined $parameter->{agent} ? $parameter->{agent} : "";
my $tables = defined $parameter->{tables} ? $parameter->{tables} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
agent => $agent,
tables => $tables,
}});
if (not $agent)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "ScanCore->agent_startup()", parameter => "agent" }});
return("!!error!!");
}
if ((not $tables) or (ref($tables) ne "ARRAY") or (@{$tables} == 0))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "ScanCore->agent_startup()", parameter => "tables" }});
return("!!error!!");
}
# Append our tables
foreach my $table (@{$tables})
{
push @{$anvil->data->{sys}{database}{check_tables}}, $table;
}
# Connect to DBs.
$anvil->Database->connect({debug => ($debug + 1)});
$anvil->Log->entry({source => $agent, line => __LINE__, level => $debug, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
$anvil->Log->entry({source => $agent, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"});
return(1);
}
# Make sure our schema is loaded.
$anvil->Database->check_agent_data({
debug => $debug,
agent => $agent,
});
# Read in our word strings.
my $words_file = $anvil->data->{path}{directories}{scan_agents}."/".$agent."/".$agent.".xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { words_file => $words_file }});
my $problem = $anvil->Words->read({
debug => ($debug + 1),
file => $words_file,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Something went wrong loading the file.
return(1);
}
return(0);
}
1;

188
notes

@ -75,6 +75,194 @@ ALTER FUNCTION history_alerts() OWNER TO admin;
CREATE TRIGGER trigger_alerts CREATE TRIGGER trigger_alerts
AFTER INSERT OR UPDATE ON alerts AFTER INSERT OR UPDATE ON alerts
FOR EACH ROW EXECUTE PROCEDURE history_alerts(); FOR EACH ROW EXECUTE PROCEDURE history_alerts();
-- This stores weighted health of nodes. Agents can set one or more health values. After a scan sweep
-- completes, ScanCore will sum these weights and the node with the *highest* value is considered the
-- *least* healthy and any servers on it will be migrated to the peer.
CREATE TABLE health (
health_uuid uuid primary key,
health_host_uuid uuid not null, -- The name of the node or dashboard that this health came from.
health_agent_name text not null, -- This is the scan agent (or program name) setting this score.
health_source_name text not null, -- This is the name of the problem, as set by the agent.
health_source_weight numeric not null, -- This is the numerical weight of this alert. The higher this value, the more severe the health issue is
modified_date timestamp with time zone not null,
FOREIGN KEY(health_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE health OWNER TO admin;
CREATE TABLE history.health (
history_id bigserial,
health_uuid uuid not null,
health_host_uuid uuid not null,
health_agent_name text not null,
health_source_name text not null,
health_source_weight numeric not null,
modified_date timestamp with time zone not null
);
ALTER TABLE history.health OWNER TO admin;
CREATE FUNCTION history_health() RETURNS trigger
AS $$
DECLARE
history_health RECORD;
BEGIN
SELECT INTO history_health * FROM health WHERE health_uuid = new.health_uuid;
INSERT INTO history.health
(health_uuid,
health_host_uuid,
health_agent_name,
health_source_name,
health_source_weight,
modified_date)
VALUES
(history_health.health_uuid,
history_health.health_host_uuid,
history_health.health_agent_name,
history_health.health_source_name,
history_health.health_source_weight,
history_health.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_health() OWNER TO admin;
CREATE TRIGGER trigger_health
AFTER INSERT OR UPDATE ON health
FOR EACH ROW EXECUTE PROCEDURE history_health();
CREATE TABLE power (
power_uuid uuid primary key,
power_host_uuid uuid not null, -- The name of the node or dashboard that this power came from.
power_ups_uuid uuid not null, -- This is the 'upses' -> 'ups_uuid' of the UPS. This is used to map what UPSes are powering a given node.
power_agent_name text not null, -- This is the name of the scan agent that wrote a given entry
power_on_battery boolean not null, -- TRUE == use "time_remaining" to determine if graceful power off is needed. FALSE == power loss NOT imminent, do not power off node.
power_seconds_left numeric, -- Should always be set, but not required *EXCEPT* when 'power_on_battery' is TRUE.
power_charge_percentage numeric, -- Percentage charge in the UPS. Used to determine when the dashboard should boot the node after AC restore
modified_date timestamp with time zone not null,
FOREIGN KEY(power_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(power_ups_uuid) REFERENCES upses(ups_uuid)
);
ALTER TABLE power OWNER TO admin;
CREATE TABLE history.power (
history_id bigserial,
power_uuid uuid,
power_host_uuid uuid,
power_ups_uuid uuid,
power_agent_name text,
power_on_battery boolean,
power_seconds_left numeric,
power_charge_percentage numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.power OWNER TO admin;
CREATE FUNCTION history_power() RETURNS trigger
AS $$
DECLARE
history_power RECORD;
BEGIN
SELECT INTO history_power * FROM power WHERE power_uuid = new.power_uuid;
INSERT INTO history.power
(power_uuid,
power_host_uuid,
power_ups_uuid,
power_agent_name,
power_on_battery,
power_seconds_left,
power_charge_percentage,
modified_date)
VALUES
(history_power.power_uuid,
history_power.power_host_uuid,
history_power.power_ups_uuid,
history_power.power_agent_name,
history_power.power_on_battery,
history_power.power_seconds_left,
history_power.power_charge_percentage,
history_power.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_power() OWNER TO admin;
CREATE TRIGGER trigger_power
AFTER INSERT OR UPDATE ON power
FOR EACH ROW EXECUTE PROCEDURE history_power();
-- This stores temperature information for a given host. ScanCore checks this data to decice if action needs
-- to be taken during a thermal event. On nodes, this is used to decide if a node should be shed or if an
-- Anvil! needs to be stopped entirely. On dashboards, this is used to check if/when it is safe to restart a
-- node that shut down because of a thermal event.
CREATE TABLE temperature (
temperature_uuid uuid primary key,
temperature_host_uuid uuid not null, -- The name of the node or dashboard that this temperature came from.
temperature_agent_name text not null, -- This is the name of the agent that set the alert
temperature_sensor_host text not null, -- This is the host (uuid) that the sensor was read from. This is important as ScanCore on a striker will read available thermal data from a node using it's IPMI data.
temperature_sensor_name text not null, -- This is the name of the sensor reporting the temperature
temperature_celsius numeric not null, -- This is the actual temperature, in celcius of course.
temperature_state text not null, -- This is a string represnting the state of the sensor. Valid values are 'ok', 'warning', and 'critical'
temperature_is text not null, -- This indicate if the temperature 'nominal', 'high' or 'low'.
modified_date timestamp with time zone not null,
FOREIGN KEY(temperature_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE temperature OWNER TO admin;
CREATE TABLE history.temperature (
history_id bigserial,
temperature_uuid uuid not null,
temperature_host_uuid uuid not null,
temperature_agent_name text not null,
temperature_sensor_host text not null,
temperature_sensor_name text not null,
temperature_celsius numeric not null,
temperature_state text not null,
temperature_is text not null,
modified_date timestamp with time zone not null
);
ALTER TABLE history.temperature OWNER TO admin;
CREATE FUNCTION history_temperature() RETURNS trigger
AS $$
DECLARE
history_temperature RECORD;
BEGIN
SELECT INTO history_temperature * FROM temperature WHERE temperature_uuid = new.temperature_uuid;
INSERT INTO history.temperature
(temperature_uuid,
temperature_host_uuid,
temperature_agent_name,
temperature_sensor_host,
temperature_sensor_name,
temperature_celsius,
temperature_state,
temperature_is,
modified_date)
VALUES
(history_temperature.temperature_uuid,
history_temperature.temperature_host_uuid,
history_temperature.temperature_agent_name,
history_temperature.temperature_sensor_host,
history_temperature.temperature_sensor_name,
history_temperature.temperature_celsius,
history_temperature.temperature_state,
history_temperature.temperature_is,
history_temperature.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_temperature() OWNER TO admin;
CREATE TRIGGER trigger_temperature
AFTER INSERT OR UPDATE ON temperature
FOR EACH ROW EXECUTE PROCEDURE history_temperature();
COMMIT; COMMIT;
============ ============

@ -38,6 +38,7 @@ Requires: expect
Requires: fence-agents-all Requires: fence-agents-all
Requires: fence-agents-virsh Requires: fence-agents-virsh
Requires: firewalld Requires: firewalld
Requires: freeipmi
Requires: gpm Requires: gpm
Requires: hdparm Requires: hdparm
Requires: htop Requires: htop

File diff suppressed because it is too large Load Diff

@ -1,155 +1,156 @@
-- This is the database schema for the 'hardware Scan Agent'. -- This is the database schema for the 'scan-hardware Scan Agent'.
CREATE TABLE hardware ( CREATE TABLE scan_hardware (
hardware_uuid uuid primary key, scan_hardware_uuid uuid primary key,
hardware_host_uuid uuid not null, scan_hardware_host_uuid uuid not null,
hardware_cpu_model text not null, scan_hardware_cpu_model text not null,
hardware_cpu_cores numeric not null, -- We don't care about individual sockets / chips scan_hardware_cpu_cores numeric not null, -- We don't care about individual sockets / chips
hardware_cpu_threads numeric not null, scan_hardware_cpu_threads numeric not null,
hardware_cpu_bugs text not null, scan_hardware_cpu_bugs text not null,
hardware_cpu_flags text not null, -- scan_hardware_cpu_flags text not null, --
hardware_ram_total numeric not null, -- This is the sum of the hardware memory module capacity scan_hardware_ram_total numeric not null, -- This is the sum of the hardware memory module capacity
hardware_memory_total numeric not null, -- This is the amount seen by the OS, minus shared memory, like that allocated to video scan_hardware_memory_total numeric not null, -- This is the amount seen by the OS, minus shared memory, like that allocated to video
hardware_memory_free numeric not null, -- scan_hardware_memory_free numeric not null, --
hardware_swap_total numeric not null, -- scan_hardware_swap_total numeric not null, --
hardware_swap_free numeric not null, -- scan_hardware_swap_free numeric not null, --
hardware_led_id text not null, -- scan_hardware_led_id text not null, --
hardware_led_css text not null, -- scan_hardware_led_css text not null, --
hardware_led_error text not null, -- scan_hardware_led_error text not null, --
modified_date timestamp with time zone not null, modified_date timestamp with time zone not null,
FOREIGN KEY(hardware_host_uuid) REFERENCES hosts(host_uuid) FOREIGN KEY(scan_hardware_host_uuid) REFERENCES hosts(host_uuid)
); );
ALTER TABLE hardware OWNER TO admin; ALTER TABLE scan_hardware OWNER TO admin;
CREATE TABLE history.hardware ( CREATE TABLE history.scan_hardware (
history_id bigserial, history_id bigserial,
hardware_uuid uuid, scan_hardware_uuid uuid,
hardware_host_uuid uuid, scan_hardware_host_uuid uuid,
hardware_cpu_model text, scan_hardware_cpu_model text,
hardware_cpu_cores numeric, scan_hardware_cpu_cores numeric,
hardware_cpu_threads numeric, scan_hardware_cpu_threads numeric,
hardware_cpu_bugs text, scan_hardware_cpu_bugs text,
hardware_cpu_flags text, scan_hardware_cpu_flags text,
hardware_ram_total numeric, scan_hardware_ram_total numeric,
hardware_memory_total numeric, scan_hardware_memory_total numeric,
hardware_memory_free numeric, scan_hardware_memory_free numeric,
hardware_swap_total numeric, scan_hardware_swap_total numeric,
hardware_swap_free numeric, scan_hardware_swap_free numeric,
hardware_led_id text, scan_hardware_led_id text,
hardware_led_css text, scan_hardware_led_css text,
hardware_led_error text, scan_hardware_led_error text,
modified_date timestamp with time zone not null modified_date timestamp with time zone not null
); );
ALTER TABLE history.hardware OWNER TO admin; ALTER TABLE history.scan_hardware OWNER TO admin;
CREATE FUNCTION history_hardware() RETURNS trigger CREATE FUNCTION history_scan_hardware() RETURNS trigger
AS $$ AS $$
DECLARE DECLARE
history_hardware RECORD; history_scan_hardware RECORD;
BEGIN BEGIN
SELECT INTO history_hardware * FROM hardware WHERE hardware_uuid=new.hardware_uuid; SELECT INTO history_scan_hardware * FROM scan_hardware WHERE scan_hardware_uuid=new.scan_hardware_uuid;
INSERT INTO history.hardware INSERT INTO history.scan_hardware
(hardware_uuid, (scan_hardware_uuid,
hardware_host_uuid, scan_hardware_host_uuid,
hardware_cpu_model, scan_hardware_cpu_model,
hardware_cpu_cores, scan_hardware_cpu_cores,
hardware_cpu_threads, scan_hardware_cpu_threads,
hardware_cpu_bugs, scan_hardware_cpu_bugs,
hardware_cpu_flags, scan_hardware_cpu_flags,
hardware_ram_total, scan_hardware_ram_total,
hardware_memory_total, scan_hardware_memory_total,
hardware_memory_free, scan_hardware_memory_free,
hardware_swap_total, scan_hardware_swap_total,
hardware_swap_free, scan_hardware_swap_free,
hardware_led_id, scan_hardware_led_id,
hardware_led_css, scan_hardware_led_css,
hardware_led_error, scan_hardware_led_error,
modified_date) modified_date)
VALUES VALUES
(history_hardware.hardware_uuid, (history_scan_hardware.scan_hardware_uuid,
history_hardware.hardware_host_uuid, history_scan_hardware.scan_hardware_host_uuid,
history_hardware.hardware_cpu_model, history_scan_hardware.scan_hardware_cpu_model,
history_hardware.hardware_cpu_cores, history_scan_hardware.scan_hardware_cpu_cores,
history_hardware.hardware_cpu_threads, history_scan_hardware.scan_hardware_cpu_threads,
history_hardware.hardware_cpu_bugs, history_scan_hardware.scan_hardware_cpu_bugs,
history_hardware.hardware_cpu_flags, history_scan_hardware.scan_hardware_cpu_flags,
history_hardware.hardware_ram_total, history_scan_hardware.scan_hardware_ram_total,
history_hardware.hardware_memory_total, history_scan_hardware.scan_hardware_memory_total,
history_hardware.hardware_memory_free, history_scan_hardware.scan_hardware_memory_free,
history_hardware.hardware_swap_total, history_scan_hardware.scan_hardware_swap_total,
history_hardware.hardware_swap_free, history_scan_hardware.scan_hardware_swap_free,
history_hardware.hardware_led_id, history_scan_hardware.scan_hardware_led_id,
history_hardware.hardware_led_css, history_scan_hardware.scan_hardware_led_css,
history_hardware.hardware_led_error, history_scan_hardware.scan_hardware_led_error,
history_hardware.modified_date); history_scan_hardware.modified_date);
RETURN NULL; RETURN NULL;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;
ALTER FUNCTION history_hardware() OWNER TO admin; ALTER FUNCTION history_scan_hardware() OWNER TO admin;
CREATE TRIGGER trigger_hardware CREATE TRIGGER trigger_scan_hardware
AFTER INSERT OR UPDATE ON hardware AFTER INSERT OR UPDATE ON scan_hardware
FOR EACH ROW EXECUTE PROCEDURE history_hardware(); FOR EACH ROW EXECUTE PROCEDURE history_scan_hardware();
CREATE TABLE hardware_ram_modules (
hardware_ram_module_uuid uuid primary key, CREATE TABLE scan_hardware_ram_modules (
hardware_ram_module_host_uuid uuid not null, scan_hardware_ram_module_uuid uuid primary key,
hardware_ram_module_locator text not null, scan_hardware_ram_module_host_uuid uuid not null,
hardware_ram_module_size numeric not null, scan_hardware_ram_module_locator text not null,
hardware_ram_module_manufacturer text not null, scan_hardware_ram_module_size numeric not null,
hardware_ram_module_model text not null, scan_hardware_ram_module_manufacturer text not null,
hardware_ram_module_serial_number text not null, scan_hardware_ram_module_model text not null,
modified_date timestamp with time zone not null, scan_hardware_ram_module_serial_number text not null,
modified_date timestamp with time zone not null,
FOREIGN KEY(hardware_ram_module_host_uuid) REFERENCES hosts(host_uuid) FOREIGN KEY(scan_hardware_ram_module_host_uuid) REFERENCES hosts(host_uuid)
); );
ALTER TABLE hardware_ram_modules OWNER TO admin; ALTER TABLE scan_hardware_ram_modules OWNER TO admin;
CREATE TABLE history.hardware_ram_modules ( CREATE TABLE history.scan_hardware_ram_modules (
history_id bigserial, history_id bigserial,
hardware_ram_module_uuid uuid, scan_hardware_ram_module_uuid uuid,
hardware_ram_module_host_uuid uuid, scan_hardware_ram_module_host_uuid uuid,
hardware_ram_module_locator text, scan_hardware_ram_module_locator text,
hardware_ram_module_size numeric, scan_hardware_ram_module_size numeric,
hardware_ram_module_manufacturer text, scan_hardware_ram_module_manufacturer text,
hardware_ram_module_model text, scan_hardware_ram_module_model text,
hardware_ram_module_serial_number text, scan_hardware_ram_module_serial_number text,
modified_date timestamp with time zone not null modified_date timestamp with time zone not null
); );
ALTER TABLE history.hardware_ram_modules OWNER TO admin; ALTER TABLE history.scan_hardware_ram_modules OWNER TO admin;
CREATE FUNCTION history_hardware_ram_modules() RETURNS trigger CREATE FUNCTION history_scan_hardware_ram_modules() RETURNS trigger
AS $$ AS $$
DECLARE DECLARE
history_hardware_ram_modules RECORD; history_scan_hardware_ram_modules RECORD;
BEGIN BEGIN
SELECT INTO history_hardware_ram_modules * FROM hardware_ram_modules WHERE hardware_ram_module_uuid=new.hardware_ram_module_uuid; SELECT INTO history_scan_hardware_ram_modules * FROM scan_hardware_ram_modules WHERE scan_hardware_ram_module_uuid=new.scan_hardware_ram_module_uuid;
INSERT INTO history.hardware_ram_modules INSERT INTO history.scan_hardware_ram_modules
(hardware_ram_module_uuid, (scan_hardware_ram_module_uuid,
hardware_ram_module_host_uuid, scan_hardware_ram_module_host_uuid,
hardware_ram_module_locator, scan_hardware_ram_module_locator,
hardware_ram_module_size, scan_hardware_ram_module_size,
hardware_ram_module_manufacturer, scan_hardware_ram_module_manufacturer,
hardware_ram_module_model, scan_hardware_ram_module_model,
hardware_ram_module_serial_number, scan_hardware_ram_module_serial_number,
modified_date) modified_date)
VALUES VALUES
(history_hardware_ram_modules.hardware_ram_module_uuid, (history_scan_hardware_ram_modules.scan_hardware_ram_module_uuid,
history_hardware_ram_modules.hardware_ram_module_host_uuid, history_scan_hardware_ram_modules.scan_hardware_ram_module_host_uuid,
history_hardware_ram_modules.hardware_ram_module_locator, history_scan_hardware_ram_modules.scan_hardware_ram_module_locator,
history_hardware_ram_modules.hardware_ram_module_size, history_scan_hardware_ram_modules.scan_hardware_ram_module_size,
history_hardware_ram_modules.hardware_ram_module_manufacturer, history_scan_hardware_ram_modules.scan_hardware_ram_module_manufacturer,
history_hardware_ram_modules.hardware_ram_module_model, history_scan_hardware_ram_modules.scan_hardware_ram_module_model,
history_hardware_ram_modules.hardware_ram_module_serial_number, history_scan_hardware_ram_modules.scan_hardware_ram_module_serial_number,
history_hardware_ram_modules.modified_date); history_scan_hardware_ram_modules.modified_date);
RETURN NULL; RETURN NULL;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;
ALTER FUNCTION history_hardware_ram_modules() OWNER TO admin; ALTER FUNCTION history_scan_hardware_ram_modules() OWNER TO admin;
CREATE TRIGGER trigger_hardware_ram_modules CREATE TRIGGER trigger_scan_hardware_ram_modules
AFTER INSERT OR UPDATE ON hardware_ram_modules AFTER INSERT OR UPDATE ON scan_hardware_ram_modules
FOR EACH ROW EXECUTE PROCEDURE history_hardware_ram_modules(); FOR EACH ROW EXECUTE PROCEDURE history_scan_hardware_ram_modules();

@ -13,8 +13,149 @@ NOTE: All string keys MUST be prefixed with the agent name! ie: 'scan_hardware_l
<!-- Canadian English --> <!-- Canadian English -->
<language name="en_CA" long_name="Canadian English" description="ScanCore scan agent that monitors hardware, like RAM modules, CSS LED status, CPU information, etc."> <language name="en_CA" long_name="Canadian English" description="ScanCore scan agent that monitors hardware, like RAM modules, CSS LED status, CPU information, etc.">
<!-- Alert entries -->
<key name="scan_hardware_alert_0001">
For some reason, two (or more) CPU cores/threads returned different flags. This should never happen. The differences are:
The differences are:
====
#!variable!flags!#
====
#!variable!these_flags!#
====
</key>
<key name="scan_hardware_alert_0002">The issue with mismatched CPU flags has been resolved.</keys>
<key name="scan_hardware_alert_0003">
For some reason, two (or more) CPU cores/threads returned different bugs. This should never happen: The differences are:
The differences are:
====
#!variable!bugs!#
====
#!variable!these_bugs!#
====
</key>
<key name="scan_hardware_alert_0004">The issue with mismatched CPU bugs has been resolved.</keys>
<key name="scan_hardware_alert_0005">
For some reason, two (or more) CPU cores/threads returned different model names. This should never happen: The differences are:
The differences are:
====
#!variable!model!#
====
#!variable!this_model!#
====
</key>
<key name="scan_hardware_alert_0006">The issue with mismatched CPU model name has been resolved.</keys>
<key name="scan_hardware_alert_0007">The CPU model has changed:
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0008">The CPU bugs list has changed:
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0009">The CPU flags (register list) list has changed:
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0010">The number of CPU cores has changed. Was a new CPU installed?
- New: [#!variable!new!# core(s)]
- Old: [#!variable!old!# core(s)]
</key>
<key name="scan_hardware_alert_0011">The number of CPU threads has changed. Was a new CPU installed?
- New: [#!variable!new!# thread(s)]
- Old: [#!variable!old!# thread(s)]
</key>
<key name="scan_hardware_alert_0012">The amount of RAM (as reported by dmidecode) on the system has changed. If there was a hardware upgrade, then this is safe to ignore. If it was unexpected, a RAM module may have failed.
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0013">The amount of memory (as reported by /proc/meminfo) on the system has changed. If there was a hardware upgrade, then this is safe to ignore. If it was unexpected, a RAM module may have failed.
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0014">The amount of memory (as reported by /proc/meminfo) on the system has changed. If there was a hardware upgrade, then this is safe to ignore. If it was unexpected, a RAM module may have failed.
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0015">The ID LED (identification light) state has changed;
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0016">The Error (CSS) LED state has changed;
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0017">The Error (Hardware) LED state has changed;
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0018">The amount of available memory (as reported by /proc/meminfo) has changed (this is common and expected);
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0019">The amount of available swap space (as reported by /proc/meminfo) has changed (this is common and expected);
- New: [#!variable!new!#]
- Old: [#!variable!old!#]
</key>
<key name="scan_hardware_alert_0020">The amount of swap is high! The swap is now: [#!variable!say_used!#] of: [#!variable!say_swap!#] (#!variable!swap_percent!#% used).</key>
<key name="scan_hardware_alert_0021">The amount is back down to a low amount used. The swap is now: [#!variable!say_used!#] of: [#!variable!say_swap!#] (#!variable!swap_percent!#% used).</key>
<key name="scan_hardware_alert_0022">The CPU, RAM and LED (if possible) has been recorded;
- CPU Model: ... [#!variable!cpu_model!#]
- Total Cores: . [#!variable!total_cores!#]
- Total Threads: [#!variable!total_threads!#]
- CPU Bugs: .... [#!variable!cpu_bugs!#]
- CPU Flags: ... [#!variable!cpu_flags!#]
- ID LED state: [#!variable!id_led!#]
- CSS LED: ..... [#!variable!css_led!#] (CSS = Customer Self-Service)
- Error LED: ... [#!variable!error_led!#]
- Total RAM: ... [#!variable!ram_total_size!#]
- Usable RAM: .. [#!variable!ram_memory_total!#] (Available to the OS)
- Free Memory: . [#!variable!ram_memory_free!#]
- Total swap: .. [#!variable!ram_swap_total!#]
- Free swap: ... [#!variable!ram_swap_free!#]
</key>
<key name="scan_hardware_alert_0023">The RAM module [#!variable!locator!#] has returned!
- Size: ........ [#!variable!old_size!#]
- Manufacturer: [#!variable!old_manufacturer!#]
- Model: ....... [#!variable!old_model!#]
- Serial Number: [#!variable!old_serial_number!#]
</key>
<key name="scan_hardware_alert_0024">Something about the RAM module [#!variable!locator!#] has changed.
This shouldn't normally happen. Was the RAM module replaced?
- Size: ........ [#!variable!old_size!#] -> [#!variable!new_size!#]
- Manufacturer: [#!variable!old_manufacturer!#] -> [#!variable!new_manufacturer!#]
- Model: ....... [#!variable!old_model!#] -> [#!variable!new_model!#]
- Serial Number: [#!variable!old_serial_number!#] -> [#!variable!new_serial_number!#]
</key>
<key name="scan_hardware_alert_0025">A new RAM memory module has been found;
- Locator: ..... [#!variable!locator!#]
- Size: ........ [#!variable!size!#]
- Manufacturer: [#!variable!manufacturer!#]
- Model: ....... [#!variable!model!#]
- Serial Number: [#!variable!serial_number!#]
</key>
<key name="scan_hardware_alert_0026">The RAM module [#!variable!locator!#] has vanished!
Was the module intentionally removed? If not, it may have failed.
- Size: ........ [#!variable!old_size!#]
- Manufacturer: [#!variable!old_manufacturer!#]
- Model: ....... [#!variable!old_model!#]
- Serial Number: [#!variable!old_serial_number!#]
</key>
<key name="scan_hardware_alert_0027">This node has: [#!variable!difference!#] less RAM than the peer node.
If the RAM is being updated, this alert will clear once this node has been upgraded to have the same amount of RAM. If a memory module has failed, this warning will clear when the module has been replaced.
- Local RAM: [#!variable!local_ram!#]
- Peer's RAM: [#!variable!peer_ram!#]
</key>
<!-- Log entries --> <!-- Log entries -->
<key name="scan_hardware_log_0001">Starting: [#!variable!program!#].</key> <key name="scan_hardware_log_0001">Starting: [#!variable!program!#].</key>
<!-- Message entries (usually meant to be alerts) -->
<key name="scan_hardware_message_0001">Unknown</key>
<key name="scan_hardware_message_0002">Lit</key>
<key name="scan_hardware_message_0003">Off</key>
<!-- Units -->
<key name="scan_hardware_unit_0001">bytes</key>
</language> </language>
</words> </words>

@ -1625,6 +1625,192 @@ CREATE TRIGGER trigger_upses
FOR EACH ROW EXECUTE PROCEDURE history_upses(); FOR EACH ROW EXECUTE PROCEDURE history_upses();
-- This is used to indicate the power state of UPSes. It is used to determine when the system needs to be
-- powered off. All UPS-type scan agents must use this table. The linkage between this and the 'upses' table
-- will be sorted out automatically based on the scan agent used and the UPS host name / IP address.
CREATE TABLE power (
power_uuid uuid primary key,
power_ups_uuid uuid not null, -- This is the 'upses' -> 'ups_uuid' of the UPS. This is used to map what UPSes are powering a given node.
power_on_battery boolean not null, -- TRUE == use "time_remaining" to determine if graceful power off is needed. FALSE == power loss NOT imminent, do not power off node.
power_seconds_left numeric, -- Should always be set, but not required *EXCEPT* when 'power_on_battery' is TRUE.
power_charge_percentage numeric, -- Percentage charge in the UPS. Used to determine when the dashboard should boot the node after AC restore
modified_date timestamp with time zone not null,
FOREIGN KEY(power_ups_uuid) REFERENCES upses(ups_uuid)
);
ALTER TABLE power OWNER TO admin;
CREATE TABLE history.power (
history_id bigserial,
power_uuid uuid,
power_ups_uuid uuid,
power_on_battery boolean,
power_seconds_left numeric,
power_charge_percentage numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.power OWNER TO admin;
CREATE FUNCTION history_power() RETURNS trigger
AS $$
DECLARE
history_power RECORD;
BEGIN
SELECT INTO history_power * FROM power WHERE power_uuid = new.power_uuid;
INSERT INTO history.power
(power_uuid,
power_ups_uuid,
power_on_battery,
power_seconds_left,
power_charge_percentage,
modified_date)
VALUES
(history_power.power_uuid,
history_power.power_ups_uuid,
history_power.power_on_battery,
history_power.power_seconds_left,
history_power.power_charge_percentage,
history_power.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_power() OWNER TO admin;
CREATE TRIGGER trigger_power
AFTER INSERT OR UPDATE ON power
FOR EACH ROW EXECUTE PROCEDURE history_power();
-- This stores weighted health of nodes. Agents can set one or more health values. After a scan sweep
-- completes, ScanCore will sum these weights and the node with the *highest* value is considered the
-- *least* healthy and any servers on it will be migrated to the peer.
CREATE TABLE health (
health_uuid uuid primary key,
health_host_uuid uuid not null, -- The name of the node or dashboard that this health came from.
health_agent_name text not null, -- This is the scan agent (or program name) setting this score.
health_source_name text not null, -- This is the name of the problem, as set by the agent.
health_source_weight numeric not null, -- This is the numerical weight of this alert. The higher this value, the more severe the health issue is
modified_date timestamp with time zone not null,
FOREIGN KEY(health_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE health OWNER TO admin;
CREATE TABLE history.health (
history_id bigserial,
health_uuid uuid not null,
health_host_uuid uuid not null,
health_agent_name text not null,
health_source_name text not null,
health_source_weight numeric not null,
modified_date timestamp with time zone not null
);
ALTER TABLE history.health OWNER TO admin;
CREATE FUNCTION history_health() RETURNS trigger
AS $$
DECLARE
history_health RECORD;
BEGIN
SELECT INTO history_health * FROM health WHERE health_uuid = new.health_uuid;
INSERT INTO history.health
(health_uuid,
health_host_uuid,
health_agent_name,
health_source_name,
health_source_weight,
modified_date)
VALUES
(history_health.health_uuid,
history_health.health_host_uuid,
history_health.health_agent_name,
history_health.health_source_name,
history_health.health_source_weight,
history_health.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_health() OWNER TO admin;
CREATE TRIGGER trigger_health
AFTER INSERT OR UPDATE ON health
FOR EACH ROW EXECUTE PROCEDURE history_health();
-- This stores temperature information for a given host. ScanCore checks this data to decice if action needs
-- to be taken during a thermal event. On nodes, this is used to decide if a node should be shed or if an
-- Anvil! needs to be stopped entirely. On dashboards, this is used to check if/when it is safe to restart a
-- node that shut down because of a thermal event.
CREATE TABLE temperature (
temperature_uuid uuid primary key,
temperature_host_uuid uuid not null, -- The name of the node or dashboard that this temperature came from.
temperature_agent_name text not null, -- This is the name of the agent that set the alert
temperature_sensor_host text not null, -- This is the host (uuid) that the sensor was read from. This is important as ScanCore on a striker will read available thermal data from a node using it's IPMI data.
temperature_sensor_name text not null, -- This is the name of the sensor reporting the temperature
temperature_value_c numeric not null, -- This is the actual temperature, in celcius of course.
temperature_weight numeric not null, -- This is the weight of the sensor value. This is the value added to the sum when testing against 'scancore::threshold::warning_temperature' and 'scancore::threshold::warning_critical'.
temperature_state text not null, -- This is a string represnting the state of the sensor. Valid values are 'ok', 'warning', and 'critical'
temperature_is text not null, -- This indicate if the temperature 'nominal', 'high' or 'low'.
modified_date timestamp with time zone not null,
FOREIGN KEY(temperature_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE temperature OWNER TO admin;
CREATE TABLE history.temperature (
history_id bigserial,
temperature_uuid uuid,
temperature_host_uuid uuid,
temperature_agent_name text,
temperature_sensor_host text,
temperature_sensor_name text,
temperature_value_c numeric,
temperature_weight numeric,
temperature_state text,
temperature_is text,
modified_date timestamp with time zone not null
);
ALTER TABLE history.temperature OWNER TO admin;
CREATE FUNCTION history_temperature() RETURNS trigger
AS $$
DECLARE
history_temperature RECORD;
BEGIN
SELECT INTO history_temperature * FROM temperature WHERE temperature_uuid = new.temperature_uuid;
INSERT INTO history.temperature
(temperature_uuid,
temperature_host_uuid,
temperature_agent_name,
temperature_sensor_host,
temperature_sensor_name,
temperature_value_c,
temperature_state,
temperature_is,
modified_date)
VALUES
(history_temperature.temperature_uuid,
history_temperature.temperature_host_uuid,
history_temperature.temperature_agent_name,
history_temperature.temperature_sensor_host,
history_temperature.temperature_sensor_name,
history_temperature.temperature_value_c,
history_temperature.temperature_state,
history_temperature.temperature_is,
history_temperature.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_temperature() OWNER TO admin;
CREATE TRIGGER trigger_temperature
AFTER INSERT OR UPDATE ON temperature
FOR EACH ROW EXECUTE PROCEDURE history_temperature();
-- ------------------------------------------------------------------------------------------------------- -- -- ------------------------------------------------------------------------------------------------------- --
-- These are special tables with no history or tracking UUIDs that simply record transient information. -- -- These are special tables with no history or tracking UUIDs that simply record transient information. --
-- ------------------------------------------------------------------------------------------------------- -- -- ------------------------------------------------------------------------------------------------------- --

@ -1031,6 +1031,8 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0543">I was asked to process alerts, but there are no configured email servers. No sense proceeding.</key> <key name="log_0543">I was asked to process alerts, but there are no configured email servers. No sense proceeding.</key>
<key name="log_0544">The table: [#!variable!table!#] already exists in the database on the host: [#!variable!host!#], no need to load the schema.</key> <key name="log_0544">The table: [#!variable!table!#] already exists in the database on the host: [#!variable!host!#], no need to load the schema.</key>
<key name="log_0545">The table: [#!variable!table!#] does NOT exists in the database on the host: [#!variable!host!#]. Will load the schema file: [#!variable!file!#] now.</key> <key name="log_0545">The table: [#!variable!table!#] does NOT exists in the database on the host: [#!variable!host!#]. Will load the schema file: [#!variable!file!#] now.</key>
<key name="log_0546">The passed in 'temperature_state' value: [#!variable!temperature_state!#] is invalid. The value must be 'ok', 'warning' or 'critical'.</key>
<key name="log_0547">The passed in 'temperature_is' value: [#!variable!temperature_is!#] is invalid. The value must be 'nominal', 'warning' or 'critical'.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -1611,6 +1613,7 @@ If you are comfortable that the target has changed for a known reason, you can s
<key name="striker_0275">Free-form description of this system.</key> <key name="striker_0275">Free-form description of this system.</key>
<key name="striker_0276">This tracks the last time a given mail server was configured for use. It allows for a round-robin switching of mail servers when one mail server stops working and two or more mail servers have been configured.</key> <key name="striker_0276">This tracks the last time a given mail server was configured for use. It allows for a round-robin switching of mail servers when one mail server stops working and two or more mail servers have been configured.</key>
<key name="striker_0277">No UPSes</key> <key name="striker_0277">No UPSes</key>
<key name="striker_0278">This is a condition record, used by programs like scan agents to track how long a condition has existed for.</key>
<!-- These are generally units and appended to numbers --> <!-- These are generally units and appended to numbers -->
<key name="suffix_0001">#!variable!number!#/sec</key> <key name="suffix_0001">#!variable!number!#/sec</key>
@ -1726,7 +1729,7 @@ Here we will inject 't_0006', which injects 't_0001' which has a variable: [#!st
<key name="unit_0025">Warning</key> <!-- Alert level 2 --> <key name="unit_0025">Warning</key> <!-- Alert level 2 -->
<key name="unit_0026">Notice</key> <!-- Alert level 3 --> <key name="unit_0026">Notice</key> <!-- Alert level 3 -->
<key name="unit_0027">Info</key> <!-- Alert level 4 --> <key name="unit_0027">Info</key> <!-- Alert level 4 -->
<key name="unit_0028"></key> <!-- free --> <key name="unit_0028">Lit</key> <!-- LED State On -->
<key name="unit_0029">Up</key> <key name="unit_0029">Up</key>
<key name="unit_0030">Down</key> <key name="unit_0030">Down</key>
<key name="unit_0031">Mbps</key> <key name="unit_0031">Mbps</key>

@ -29,6 +29,13 @@ if (($running_directory =~ /^\./) && ($ENV{PWD}))
my $anvil = Anvil::Tools->new({log_level => 2, log_secure => 1}); my $anvil = Anvil::Tools->new({log_level => 2, log_secure => 1});
$anvil->data->{scancore} = {
threshold => {
warning_temperature => 5,
warning_critical => 5,
},
};
$anvil->Storage->read_config(); $anvil->Storage->read_config();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0115", variables => { program => $THIS_FILE }});

@ -28,18 +28,3 @@ $anvil->Get->switches;
print "Connecting to the database(s);\n"; print "Connecting to the database(s);\n";
$anvil->Database->connect({debug => 3}); $anvil->Database->connect({debug => 3});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"});
if (0)
{
$anvil->Alert->register({
debug => 2,
alert_level => "warning",
message => "message_0002",
set_by => $THIS_FILE,
});
}
if (1)
{
### TODO: Left off here. Remove 'alert_title_X' keys and continue testing email body generation.
$anvil->Email->send_alerts({debug => 2});
}

Loading…
Cancel
Save