* Started work on the scan-drbd scan agent. Got it to the point that it is gathering needed data.

* Fixed a bug in Database->check_agent_data() where the list of tables wasn't passed in, and thus the table list wasn't then passed on to Database->_find_behind_databases().
* Started work on a new method called Storage->parse_lsblk().

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent cda51e562d
commit 4d5ec72026
  1. 1
      Anvil/Tools.pm
  2. 29
      Anvil/Tools/Database.pm
  3. 20
      Anvil/Tools/ScanCore.pm
  4. 56
      Anvil/Tools/Storage.pm
  5. 477
      notes
  6. 464
      scancore-agents/scan-drbd/scan-drbd
  7. 357
      scancore-agents/scan-drbd/scan-drbd.sql
  8. 39
      scancore-agents/scan-drbd/scan-drbd.xml
  9. 56
      tools/test.pl

@ -1125,6 +1125,7 @@ sub _set_paths
'anvil-update-files' => "/usr/sbin/anvil-update-files",
'anvil-update-states' => "/usr/sbin/anvil-update-states",
'anvil-update-system' => "/usr/sbin/anvil-update-system",
blockdev => "/usr/sbin/blockdev",
bridge => "/usr/sbin/bridge",
bzip2 => "/usr/bin/bzip2",
'call_striker-get-peer-data' => "/usr/sbin/call_striker-get-peer-data",

@ -377,6 +377,10 @@ Parameters;
This is the name of the calling scan agent. The name is used to find the schema file under C<< <path::directories::scan_agents>/<agent>/<agent>.sql >>.
=head3 tables (required)
This is the array reference of tables used to check if any databases are behind and need a resync.
=cut
sub check_agent_data
{
@ -387,8 +391,10 @@ sub check_agent_data
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->check_agent_data()" }});
my $agent = defined $parameter->{agent} ? $parameter->{agent} : "";
my $tables = defined $parameter->{tables} ? $parameter->{tables} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
agent => $agent,
tables => $tables,
}});
if (not $agent)
@ -396,6 +402,11 @@ sub check_agent_data
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->check_agent_data()", parameter => "agent" }});
return("!!error!!");
}
if (ref($tables) ne "ARRAY")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->check_agent_data()", parameter => "tables" }});
return("!!error!!");
}
my $schema_file = $anvil->data->{path}{directories}{scan_agents}."/".$agent."/".$agent.".sql";
my $loaded = $anvil->Database->check_for_schema({
@ -421,14 +432,16 @@ sub check_agent_data
{
# Log and register an alert. This should never happen, so we set it as a
# warning level alert.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0181", variables => {
my $variables = {
agent_name => $agent,
file => $schema_file,
}});
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0181", variables => $variables});
$anvil->Alert->register({
debug => 2,
alert_level => "warning",
message => "message_0181,!!agent_name!".$agent."!!,!!file!".$schema_file."!!",
message => "message_0181",
variables => $variables,
set_by => $agent,
});
}
@ -446,16 +459,17 @@ sub check_agent_data
if ($changed)
{
# Register an alert cleared message.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0182", variables => {
my $variables = {
agent_name => $agent,
file => $schema_file,
}});
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0182", variables => $variables});
$anvil->Alert->register({
debug => $debug,
alert_level => "warning",
clear_alert => 1,
message => "message_0182,!!agent_name!".$agent."!!,!!file!".$schema_file."!!",
message => "message_0182",
variables => $variables,
set_by => $agent,
});
}
@ -480,6 +494,7 @@ sub check_agent_data
$anvil->Database->_find_behind_databases({
debug => $debug,
source => $agent,
tables => $tables,
});
}

@ -145,6 +145,15 @@ sub agent_startup
}
}
if ((ref($tables) ne "ARRAY") && (@{$tables} > 0))
{
# Append our tables
foreach my $table (@{$tables})
{
push @{$anvil->data->{sys}{database}{check_tables}}, $table;
}
}
# Connect to DBs.
$anvil->Database->connect({debug => $debug});
$anvil->Log->entry({source => $agent, line => __LINE__, level => $debug, secure => 0, key => "log_0132"});
@ -161,16 +170,11 @@ sub agent_startup
# It's possible that some agents don't have a database (or use core database tables only)
if (@{$tables} > 0)
{
# Append our tables
foreach my $table (@{$tables})
{
push @{$anvil->data->{sys}{database}{check_tables}}, $table;
}
# Make sure our schema is loaded.
$anvil->Database->check_agent_data({
debug => $debug,
agent => $agent,
debug => $debug,
agent => $agent,
tables => $tables,
});
}

@ -24,6 +24,7 @@ my $THIS_FILE = "Storage.pm";
# get_file_stats
# make_directory
# move_file
# parse_lsblk
# read_config
# read_file
# read_mode
@ -1930,6 +1931,61 @@ fi";
return(0);
}
=head2 parse_lsblk
This calls C<< lsblk >> (in json format) and parses the output. Data is stored as:
* lsblk::<target>::...
Parameters;
=head3 password (optional)
If C<< target >> is set, this is the password used to log into the remote system as the C<< remote_user >>. If it is not set, an attempt to connect without a password will be made..
=head3 port (optional, default 22)
If C<< target >> is set, this is the TCP port number used to connect to the remote machine.
=head3 remote_user (optional)
If C<< target >> is set, this is the user account that will be used when connecting to the remote system.
=head3 target (optional)
If set, C<< lsblk >> read from the target machine. This must be either an IP address or a resolvable host name.
B<< Note >>: If not set, the short host name of this system is used in C<< lsblk::<short_host_name>::: >>.
=cut
sub parse_lsblk
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->parse_lsblk()" }});
# Setup default values
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $port = defined $parameter->{port} ? $parameter->{port} : 22;
my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root";
my $target = defined $parameter->{target} ? $parameter->{target} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
password => $anvil->Log->is_secure($password),
port => $port,
remote_user => $remote_user,
target => $target,
}});
my $shell_call = $anvil->data->{path}{exe}{lsblk}." --all --bytes --json";
return(0);
}
=head2 read_config
This method is used to read 'Anvil::Tools' style configuration files. These configuration files are in the format:

477
notes

@ -1,480 +1,3 @@
BEGIN TRANSACTION;
ALTER TABLE history.recipients RENAME recipient_new_level TO recipient_level;
ALTER TABLE history.recipients DROP COLUMN recipient_units;
ALTER TABLE recipients RENAME recipient_new_level TO recipient_level;
ALTER TABLE recipients DROP COLUMN recipient_units;
ALTER TABLE alert_sent DROP COLUMN alert_name;
DROP FUNCTION history_recipients() CASCADE;
CREATE FUNCTION history_recipients() RETURNS trigger
AS $$
DECLARE
history_recipients RECORD;
BEGIN
SELECT INTO history_recipients * FROM recipients WHERE recipient_uuid = new.recipient_uuid;
INSERT INTO history.recipients
(recipient_uuid,
recipient_name,
recipient_email,
recipient_language,
recipient_level,
modified_date)
VALUES
(history_recipients.recipient_uuid,
history_recipients.recipient_name,
history_recipients.recipient_email,
history_recipients.recipient_language,
history_recipients.recipient_level,
history_recipients.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_recipients() OWNER TO admin;
CREATE TRIGGER trigger_recipients
AFTER INSERT OR UPDATE ON recipients
FOR EACH ROW EXECUTE PROCEDURE history_recipients();
ALTER TABLE alerts ADD COLUMN alert_processed integer not null default 0;
ALTER TABLE history.alerts ADD COLUMN alert_processed integer;
DROP FUNCTION history_alerts() CASCADE;
CREATE FUNCTION history_alerts() RETURNS trigger
AS $$
DECLARE
history_alerts RECORD;
BEGIN
SELECT INTO history_alerts * FROM alerts WHERE alert_uuid = new.alert_uuid;
INSERT INTO history.alerts
(alert_uuid,
alert_host_uuid,
alert_set_by,
alert_level,
alert_title,
alert_message,
alert_sort_position,
alert_show_header,
alert_processed,
modified_date)
VALUES
(history_alerts.alert_uuid,
history_alerts.alert_host_uuid,
history_alerts.alert_set_by,
history_alerts.alert_level,
history_alerts.alert_title,
history_alerts.alert_message,
history_alerts.alert_sort_position,
history_alerts.alert_show_header,
history_alerts.alert_processed,
history_alerts.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_alerts() OWNER TO admin;
CREATE TRIGGER trigger_alerts
AFTER INSERT OR UPDATE ON alerts
FOR EACH ROW EXECUTE PROCEDURE history_alerts();
-- This stores weighted health of nodes. Agents can set one or more health values. After a scan sweep
-- completes, ScanCore will sum these weights and the node with the *highest* value is considered the
-- *least* healthy and any servers on it will be migrated to the peer.
CREATE TABLE health (
health_uuid uuid primary key,
health_host_uuid uuid not null, -- The name of the node or dashboard that this health came from.
health_agent_name text not null, -- This is the scan agent (or program name) setting this score.
health_source_name text not null, -- This is the name of the problem, as set by the agent.
health_source_weight numeric not null, -- This is the numerical weight of this alert. The higher this value, the more severe the health issue is
modified_date timestamp with time zone not null,
FOREIGN KEY(health_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE health OWNER TO admin;
CREATE TABLE history.health (
history_id bigserial,
health_uuid uuid not null,
health_host_uuid uuid not null,
health_agent_name text not null,
health_source_name text not null,
health_source_weight numeric not null,
modified_date timestamp with time zone not null
);
ALTER TABLE history.health OWNER TO admin;
CREATE FUNCTION history_health() RETURNS trigger
AS $$
DECLARE
history_health RECORD;
BEGIN
SELECT INTO history_health * FROM health WHERE health_uuid = new.health_uuid;
INSERT INTO history.health
(health_uuid,
health_host_uuid,
health_agent_name,
health_source_name,
health_source_weight,
modified_date)
VALUES
(history_health.health_uuid,
history_health.health_host_uuid,
history_health.health_agent_name,
history_health.health_source_name,
history_health.health_source_weight,
history_health.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_health() OWNER TO admin;
CREATE TRIGGER trigger_health
AFTER INSERT OR UPDATE ON health
FOR EACH ROW EXECUTE PROCEDURE history_health();
CREATE TABLE power (
power_uuid uuid primary key,
power_host_uuid uuid not null, -- The name of the node or dashboard that this power came from.
power_ups_uuid uuid not null, -- This is the 'upses' -> 'ups_uuid' of the UPS. This is used to map what UPSes are powering a given node.
power_agent_name text not null, -- This is the name of the scan agent that wrote a given entry
power_on_battery boolean not null, -- TRUE == use "time_remaining" to determine if graceful power off is needed. FALSE == power loss NOT imminent, do not power off node.
power_seconds_left numeric, -- Should always be set, but not required *EXCEPT* when 'power_on_battery' is TRUE.
power_charge_percentage numeric, -- Percentage charge in the UPS. Used to determine when the dashboard should boot the node after AC restore
modified_date timestamp with time zone not null,
FOREIGN KEY(power_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(power_ups_uuid) REFERENCES upses(ups_uuid)
);
ALTER TABLE power OWNER TO admin;
CREATE TABLE history.power (
history_id bigserial,
power_uuid uuid,
power_host_uuid uuid,
power_ups_uuid uuid,
power_agent_name text,
power_on_battery boolean,
power_seconds_left numeric,
power_charge_percentage numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.power OWNER TO admin;
CREATE FUNCTION history_power() RETURNS trigger
AS $$
DECLARE
history_power RECORD;
BEGIN
SELECT INTO history_power * FROM power WHERE power_uuid = new.power_uuid;
INSERT INTO history.power
(power_uuid,
power_host_uuid,
power_ups_uuid,
power_agent_name,
power_on_battery,
power_seconds_left,
power_charge_percentage,
modified_date)
VALUES
(history_power.power_uuid,
history_power.power_host_uuid,
history_power.power_ups_uuid,
history_power.power_agent_name,
history_power.power_on_battery,
history_power.power_seconds_left,
history_power.power_charge_percentage,
history_power.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_power() OWNER TO admin;
CREATE TRIGGER trigger_power
AFTER INSERT OR UPDATE ON power
FOR EACH ROW EXECUTE PROCEDURE history_power();
-- This stores temperature information for a given host. ScanCore checks this data to decice if action needs
-- to be taken during a thermal event. On nodes, this is used to decide if a node should be shed or if an
-- Anvil! needs to be stopped entirely. On dashboards, this is used to check if/when it is safe to restart a
-- node that shut down because of a thermal event.
CREATE TABLE temperature (
temperature_uuid uuid primary key,
temperature_host_uuid uuid not null, -- The name of the node or dashboard that this temperature came from.
temperature_agent_name text not null, -- This is the name of the agent that set the alert
temperature_sensor_host text not null, -- This is the host (uuid) that the sensor was read from. This is important as ScanCore on a striker will read available thermal data from a node using it's IPMI data.
temperature_sensor_name text not null, -- This is the name of the sensor reporting the temperature
temperature_celsius numeric not null, -- This is the actual temperature, in celcius of course.
temperature_state text not null, -- This is a string represnting the state of the sensor. Valid values are 'ok', 'warning', and 'critical'
temperature_is text not null, -- This indicate if the temperature 'nominal', 'high' or 'low'.
modified_date timestamp with time zone not null,
FOREIGN KEY(temperature_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE temperature OWNER TO admin;
CREATE TABLE history.temperature (
history_id bigserial,
temperature_uuid uuid not null,
temperature_host_uuid uuid not null,
temperature_agent_name text not null,
temperature_sensor_host text not null,
temperature_sensor_name text not null,
temperature_celsius numeric not null,
temperature_state text not null,
temperature_is text not null,
modified_date timestamp with time zone not null
);
ALTER TABLE history.temperature OWNER TO admin;
CREATE FUNCTION history_temperature() RETURNS trigger
AS $$
DECLARE
history_temperature RECORD;
BEGIN
SELECT INTO history_temperature * FROM temperature WHERE temperature_uuid = new.temperature_uuid;
INSERT INTO history.temperature
(temperature_uuid,
temperature_host_uuid,
temperature_agent_name,
temperature_sensor_host,
temperature_sensor_name,
temperature_celsius,
temperature_state,
temperature_is,
modified_date)
VALUES
(history_temperature.temperature_uuid,
history_temperature.temperature_host_uuid,
history_temperature.temperature_agent_name,
history_temperature.temperature_sensor_host,
history_temperature.temperature_sensor_name,
history_temperature.temperature_celsius,
history_temperature.temperature_state,
history_temperature.temperature_is,
history_temperature.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_temperature() OWNER TO admin;
CREATE TRIGGER trigger_temperature
AFTER INSERT OR UPDATE ON temperature
FOR EACH ROW EXECUTE PROCEDURE history_temperature();
DROP FUNCTION history_definitions() CASCADE;
DROP TABLE history.definitions;
DROP TABLE definitions;
DROP FUNCTION history_servers() CASCADE;
DROP TABLE history.servers;
DROP TABLE servers;
CREATE TABLE servers (
server_uuid uuid not null primary key,
server_name text not null, -- This is the server's name. It can change without re-uploading the server.
server_anvil_uuid uuid not null, -- This is the Anvil! system that the server lives on. It can move to another Anvil!, so this can change.
server_user_stop boolean not null default FALSE, -- When set, the server was stopped by a user. The Anvil! will not start a server that has been cleanly stopped.
server_start_after_server_uuid uuid, -- This can be the server_uuid of another server. If set, this server will boot 'server_start_delay' seconds after the referenced server boots. A value of '00000000-0000-0000-0000-000000000000' will tell 'anvil-safe-start' to not boot the server at all. If a server is set not to start, any dependent servers will also stay off.
server_start_delay integer not null default 0, -- See above.
server_host_uuid uuid, -- This is the current hosts -> host_uuid for this server. If the server is off, this will be blank.
server_state text not null, -- This is the current state of this server, as reported by 'virsh list --all' (see: man virsh -> GENERIC COMMANDS -> --list)
server_live_migration boolean not null default TRUE, -- When false, servers will be frozen for a migration, instead of being migrated while the server is migrating. During a cold migration, the server will be unresponsive, so connections to it could time out. However, by being frozen the migration will complete faster.
server_pre_migration_file_uuid uuid, -- This is set to the files -> file_uuid of a script to run BEFORE migrating a server. If the file isn't found or can't run, the script is ignored.
server_pre_migration_arguments text not null, -- These are arguments to pass to the pre-migration script
server_post_migration_file_uuid uuid, -- This is set to the files -> file_uuid of a script to run AFTER migrating a server. If the file isn't found or can't run, the script is ignored.
server_post_migration_arguments text not null, -- These are arguments to pass to the post-migration script
server_ram_in_use numeric not null, -- This is the amount of RAM currently used by the server. If the server is off, then this is the amount of RAM last used when the server was running.
server_configured_ram numeric not null, -- This is the amount of RAM allocated to the server in the on-disk definition file. This should always match the table above, but allows us to track when a user manually updated the allocated RAM in the on-disk definition, but that hasn't yet been picked up by the server
server_updated_by_user numeric not null, -- This is set to a unix timestamp when the user last updated the definition (via striker). When set, scan-server will check this value against the age of the definition file on disk. If this is newer, the on-disk defition will be updated. On the host with the server (if any), the new definition will be loaded into virsh as well.
server_boot_time numeric not null, -- This is the unix time (since epoch) when the server booted. It is calculated by checking the 'ps -p <pid> -o etimes=' when a server is seen to be running when it had be last seen as off. If a server that had been running is seen to be off, this is set back to 0.
modified_date timestamp with time zone not null,
FOREIGN KEY(server_anvil_uuid) REFERENCES anvils(anvil_uuid),
FOREIGN KEY(server_start_after_server_uuid) REFERENCES servers(server_uuid),
FOREIGN KEY(server_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(server_pre_migration_file_uuid) REFERENCES files(file_uuid),
FOREIGN KEY(server_post_migration_file_uuid) REFERENCES files(file_uuid)
);
ALTER TABLE servers OWNER TO admin;
CREATE TABLE history.servers (
history_id bigserial,
server_uuid uuid,
server_name text,
server_anvil_uuid uuid,
server_user_stop boolean,
server_start_after_server_uuid uuid,
server_start_delay integer,
server_host_uuid uuid,
server_state text,
server_live_migration boolean,
server_pre_migration_file_uuid uuid,
server_pre_migration_arguments text,
server_post_migration_file_uuid uuid,
server_post_migration_arguments text,
server_ram_in_use numeric,
server_configured_ram numeric,
server_updated_by_user numeric,
server_boot_time numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.servers OWNER TO admin;
CREATE FUNCTION history_servers() RETURNS trigger
AS $$
DECLARE
history_servers RECORD;
BEGIN
SELECT INTO history_servers * FROM servers WHERE server_uuid = new.server_uuid;
INSERT INTO history.servers
(server_uuid,
server_name,
server_anvil_uuid,
server_user_stop,
server_start_after_server_uuid,
server_start_delay,
server_host_uuid,
server_state,
server_live_migration,
server_pre_migration_file_uuid,
server_pre_migration_arguments,
server_post_migration_file_uuid,
server_post_migration_arguments,
server_ram_in_use,
server_configured_ram,
server_updated_by_user,
server_boot_time,
modified_date)
VALUES
(history_servers.server_uuid,
history_servers.server_name,
history_servers.server_anvil_uuid,
history_servers.server_user_stop,
history_servers.server_start_after_server_uuid,
history_servers.server_start_delay,
history_servers.server_host_uuid,
history_servers.server_state,
history_servers.server_live_migration,
history_servers.server_pre_migration_file_uuid,
history_servers.server_pre_migration_arguments,
history_servers.server_post_migration_file_uuid,
history_servers.server_post_migration_arguments,
history_servers.server_ram_in_use,
history_servers.server_configured_ram,
history_servers.server_updated_by_user,
history_servers.server_boot_time,
history_servers.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_servers() OWNER TO admin;
CREATE TRIGGER trigger_servers
AFTER INSERT OR UPDATE ON servers
FOR EACH ROW EXECUTE PROCEDURE history_servers();
-- This stores the XML definition for a server. Whenever a server_definition is found missing on a node or DR host,
-- it will be rewritten from here. If this copy changes, it will be updated on the hosts.
CREATE TABLE server_definitions (
server_definition_uuid uuid not null primary key,
server_definition_server_uuid uuid not null, -- This is the servers -> server_uuid of the server
server_definition_xml text not null, -- This is the XML body.
modified_date timestamp with time zone not null,
FOREIGN KEY(server_definition_server_uuid) REFERENCES servers(server_uuid)
);
ALTER TABLE server_definitions OWNER TO admin;
CREATE TABLE history.server_definitions (
history_id bigserial,
server_definition_uuid uuid,
server_definition_server_uuid uuid,
server_definition_xml text,
modified_date timestamp with time zone not null
);
ALTER TABLE history.server_definitions OWNER TO admin;
CREATE FUNCTION history_server_definitions() RETURNS trigger
AS $$
DECLARE
history_server_definitions RECORD;
BEGIN
SELECT INTO history_server_definitions * FROM server_definitions WHERE server_definition_uuid = new.server_definition_uuid;
INSERT INTO history.server_definitions
(server_definition_uuid,
server_definition_server_uuid,
server_definition_xml,
modified_date)
VALUES
(history_server_definitions.server_definition_uuid,
history_server_definitions.server_definition_server_uuid,
history_server_definitions.server_definition_xml,
history_server_definitions.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_server_definitions() OWNER TO admin;
CREATE TRIGGER trigger_server_definitions
AFTER INSERT OR UPDATE ON server_definitions
FOR EACH ROW EXECUTE PROCEDURE history_server_definitions();
DROP TABLE updated;
CREATE TABLE updated (
updated_uuid uuid not null primary key,
updated_host_uuid uuid not null,
updated_by text not null, -- The name of the agent (or "ScanCore' itself) that updated.
modified_date timestamp with time zone not null,
FOREIGN KEY(updated_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE updated OWNER TO admin;
DROP FUNCTION history_anvils() CASCADE;
CREATE FUNCTION history_anvils() RETURNS trigger
AS $$
DECLARE
history_anvils RECORD;
BEGIN
SELECT INTO history_anvils * FROM anvils WHERE anvil_uuid = new.anvil_uuid;
INSERT INTO history.anvils
(anvil_uuid,
anvil_name,
anvil_description,
anvil_password,
anvil_node1_host_uuid,
anvil_node2_host_uuid,
anvil_dr1_host_uuid,
anvil_fencing_enabled,
modified_date)
VALUES
(history_anvils.anvil_uuid,
history_anvils.anvil_name,
history_anvils.anvil_description,
history_anvils.anvil_password,
history_anvils.anvil_node1_host_uuid,
history_anvils.anvil_node2_host_uuid,
history_anvils.anvil_dr1_host_uuid,
history_anvils.anvil_fencing_enabled,
history_anvils.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_anvils() OWNER TO admin;
CREATE TRIGGER trigger_anvils
AFTER INSERT OR UPDATE ON anvils
FOR EACH ROW EXECUTE PROCEDURE history_anvils();
COMMIT;
============
From: test-alert@alert.alteeve.com

@ -0,0 +1,464 @@
#!/usr/bin/perl
#
# This scans the nodes and DR host for DRBD resources and their states.
#
# NOTE: The data stored here is not bound to a given host. As such, only hosted VMs are processed.
#
# Examples;
#
# Exit codes;
# 0 = Normal exit.
# 1 = Startup failure (not running as root, no DB, bad file read, etc)
# 2 = DRBD not found or configured.
#
# TODO:
# -
#
use strict;
use warnings;
use Anvil::Tools;
use Data::Dumper;
# Disable buffering
$| = 1;
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
$< = $>;
$( = $);
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $anvil = Anvil::Tools->new({log_level => 2, log_secure => 1});
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
$anvil->data->{'scan-drbd'} = {
resource_status => "/sys/kernel/debug/drbd/resources",
config_directory => "/etc/drbd.d",
};
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
$anvil->data->{scancore}{'scan-drbd'}{disable} = 0;
$anvil->data->{scancore}{'scan-drbd'}{'auto-undefine'} = 1;
$anvil->data->{switches}{force} = 0;
$anvil->Storage->read_config();
# Read switches
$anvil->Get->switches;
# If we're disabled and '--force' wasn't used, exit.
if (($anvil->data->{scancore}{'scan-drbd'}{disable}) && (not $anvil->data->{switches}{force}))
{
# Exit.
$anvil->nice_exit({exit_code => 0});
}
if ($anvil->data->{switches}{purge})
{
# This can be called when doing bulk-database purges.
my $schema_file = $anvil->data->{path}{directories}{scan_agents}."/".$THIS_FILE."/".$THIS_FILE.".sql";
$anvil->Database->purge_data({
debug => 2,
tables => $anvil->Database->get_tables_from_schema({schema_file => $schema_file}),
});
$anvil->nice_exit({exit_code => 0});
}
# Handle start-up tasks
my $problem = $anvil->ScanCore->agent_startup({agent => $THIS_FILE});
if ($problem)
{
$anvil->nice_exit({exit_code => 1});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_drbd_log_0001", variables => { program => $THIS_FILE }});
if (not gather_data($anvil))
{
# DRBD not found or configured.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "scan_drbd_error_0001"});
$anvil->nice_exit({exit_code => 2});
}
# Update the database
$anvil->Database->insert_or_update_updated({updated_by => $THIS_FILE});
# Clean up and go away.
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
sub gather_data
{
my ($anvil) = @_;
if ((not -e $anvil->data->{path}{exe}{drbdadm}) or ($anvil->Get->host_type eq "striker"))
{
# DRBD isn't installed or this is a striker node.
return(0);
}
# Parse drbdadm
my ($drbd_xml, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." dump-xml"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { drbd_xml => $drbd_xml, return_code => $return_code }});
if ($return_code)
{
# Failed to dump the XML.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_drbd_error_0002", variables => { return_code => $return_code }});
return(0);
}
else
{
local $@;
my $dom = eval { XML::LibXML->load_xml(string => $drbd_xml); };
if ($@)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_drbd_error_0003", variables => {
xml => $drbd_xml,
error => $@,
}});
return(0);
}
else
{
# Successful parse!
### TODO: Might be best to config these default values by calling/parsing
### 'drbdsetup show <resource> --show-defaults'.
$anvil->data->{new}{scan_drbd}{scan_drbd_host_uuid} = $anvil->Get->host_uuid;
$anvil->data->{new}{scan_drbd}{scan_drbd_common_xml} = $drbd_xml;
$anvil->data->{new}{scan_drbd}{scan_drbd_flush_disk} = 1;
$anvil->data->{new}{scan_drbd}{scan_drbd_flush_md} = 1;
$anvil->data->{new}{scan_drbd}{scan_drbd_timeout} = 6; # Default is '60', 6 seconds
$anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed} = 0;
foreach my $name ($dom->findnodes('/config/common/section'))
{
my $section = $name->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { section => $section }});
foreach my $option_name ($name->findnodes('./option'))
{
my $variable = $option_name->{name};
my $value = $option_name->{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
's1:variable' => $variable,
's2:value' => $value,
}});
if ($section eq "net")
{
if ($variable eq "timeout")
{
$value /= 10;
$anvil->data->{new}{scan_drbd}{scan_drbd_timeout} = ($value / 10);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::scan_drbd::scan_drbd_timeout" => $anvil->data->{new}{scan_drbd}{scan_drbd_timeout},
}});
}
}
if ($section eq "disk")
{
if ($variable eq "disk-flushes")
{
$anvil->data->{new}{scan_drbd}{scan_drbd_flush_disk} = $value eq "no" ? 0 : 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::scan_drbd::scan_drbd_flush_disk" => $anvil->data->{new}{scan_drbd}{scan_drbd_flush_disk},
}});
}
if ($variable eq "md-flushes")
{
$anvil->data->{new}{scan_drbd}{scan_drbd_flush_md} = $value eq "no" ? 0 : 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::scan_drbd::scan_drbd_flush_md" => $anvil->data->{new}{scan_drbd}{scan_drbd_flush_md},
}});
}
}
}
}
foreach my $name ($dom->findnodes('/config/resource'))
{
my $resource = $name->{name};
my $conf_file = $name->{'conf-file-line'};
$conf_file =~ s/:\d+$//;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:resource' => $resource,
's2:conf_file' => $conf_file,
}});
$anvil->data->{new}{resource}{$resource}{xml} = $name;
$anvil->data->{new}{resource}{$resource}{up} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"new::resource::${resource}::xml" => $anvil->data->{new}{resource}{$resource}{xml},
}});
foreach my $host ($name->findnodes('./host'))
{
my $this_host_name = $host->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { this_host_name => $this_host_name }});
next if (($this_host_name ne $anvil->Get->host_name) && ($this_host_name ne $anvil->Get->short_host_name));
foreach my $volume_vnr ($host->findnodes('./volume'))
{
my $volume = $volume_vnr->{vnr};
my $meta_disk = $volume_vnr->findvalue('./meta-disk');
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:volume' => $volume,
's2:meta_disk' => $meta_disk,
}});
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path} = $volume_vnr->findvalue('./device');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor} = $volume_vnr->findvalue('./device/@minor');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{size} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:new::resource::${resource}::volume::${volume}::device_path" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path},
"s2:new::resource::${resource}::volume::${volume}::device_minor" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor},
}});
}
}
foreach my $connection ($name->findnodes('./connection'))
{
my $peer = "";
foreach my $host ($connection->findnodes('./host'))
{
my $this_host_name = $host->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { this_host_name => $this_host_name }});
next if (($this_host_name eq $anvil->Get->host_name) or ($this_host_name eq $anvil->Get->short_host_name));
$peer = $this_host_name;
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address} = $host->findvalue('./address');
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port} = $host->findvalue('./address/@port');;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:new::resource::${resource}::peer::${peer}::peer_ip_address" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{peer_ip_address},
"s2:new::resource::${resource}::peer::${peer}::tcp_port" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{tcp_port},
}});
# Setup some default values.
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = "unknown";
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = "unknown";
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{volume}})
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{connection_state} = "disconnected";
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_disk_state} = "down";
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_disk_state} = "unknown";
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_role} = "down";
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_role} = "unknown";
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{out_of_sync_size} = -1;
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed} = 0;
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync} = 0;
}
}
foreach my $name ($connection->findnodes('./section'))
{
my $section = $name->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { section => $section }});
foreach my $option_name ($name->findnodes('./option'))
{
my $variable = $option_name->{name};
my $value = $option_name->{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:variable' => $variable,
's2:value' => $value,
}});
if ($section eq "net")
{
if ($variable eq "protocol")
{
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol} = $value;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::peer::${peer}::protocol" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{protocol},
}});
}
if ($variable eq "fencing")
{
$anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing} = $value;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::peer::${peer}::fencing" => $anvil->data->{new}{resource}{$resource}{peer}{$peer}{fencing},
}});
}
}
}
}
}
}
}
}
local(*DIRECTORY);
opendir(DIRECTORY, $anvil->data->{'scan-drbd'}{resource_status});
while(my $file = readdir(DIRECTORY))
{
next if $file eq ".";
next if $file eq "..";
my $full_path = $anvil->data->{'scan-drbd'}{resource_status}."/".$file;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { full_path => $full_path }});
if (-d $full_path)
{
my $resource = $file;
$anvil->data->{new}{resource}{$resource}{up} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::up" => $anvil->data->{new}{resource}{$resource}{up},
}});
}
}
closedir(DIRECTORY);
#print "Sync progress:\n";
#print " ".sprintf("%-${longest_resource}s", "Res")." ".sprintf("%-${longest_connection}s", "To")." Vol\n";
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::up" => $anvil->data->{new}{resource}{$resource}{up},
}});
# If the resource isn't up, there's won't be a proc file to read.
next if not $anvil->data->{new}{resource}{$resource}{up};
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{volume}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume => $volume }});
foreach my $peer (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}})
{
my $proc_file = $anvil->data->{'scan-drbd'}{resource_status}."/".$resource."/connections/".$peer."/".$volume."/proc_drbd";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { proc_file => $proc_file }});
my $file_body = $anvil->Storage->read_file({file => $proc_file});
my $progress = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_body => $file_body }});
foreach my $line (split/\n/, $file_body)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /cs:(.*?) /)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{connection_state} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::peer::${peer}::connection_state" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{connection_state},
}});
}
if ($line =~ /ro:(.*?)\/(.*?) /)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_role} = lc($1);
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_role} = lc($2);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::peer::${peer}::local_role" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_role},
"new::resource::${resource}::volume::${volume}::peer::${peer}::peer_role" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_role},
}});
# If the peer is secondary, read the device size.
if ($anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_role} eq "secondary")
{
# Get the size of the DRBD device.
my ($size, $return_code) = $anvil->System->call({secure => 1, shell_call => $anvil->data->{path}{exe}{blockdev}." --getsize64 /dev/drbd".$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
size => $size,
return_code => $return_code,
}});
if (not $return_code)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{size} = $size;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::size" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{size}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{size}}).")",
}});
}
}
}
if ($line =~ /ds:(.*?)\/(.*?) /)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_disk_state} = $1;
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_disk_state} = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::peer::${peer}::local_disk_state" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{local_disk_state},
"new::resource::${resource}::volume::${volume}::peer::${peer}::peer_disk_state" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{peer_disk_state},
}});
}
if ($line =~ /oos:(\d+)/)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{out_of_sync_size} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::peer::${peer}::out_of_sync_size" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{out_of_sync_size},
}});
}
=cut
0: cs:Established ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:[0;0] ua:0 ap:[0;0] ep:1 wo:1 oos:0
resync: used:0/61 hits:0 misses:0 starving:0 locked:0 changed:0
act_log: used:0/1237 hits:0 misses:0 starving:0 locked:0 changed:0
blocked on activity log: 0/0/0
0: cs:SyncTarget ro:Secondary/Primary ds:Inconsistent/UpToDate C r-----
ns:0 nr:648960 dw:648728 dr:0 al:0 bm:0 lo:4 pe:[0;1] ua:4 ap:[0;0] ep:1 wo:1 oos:20321476
[>....................] sync'ed: 3.2% (19844/20476)M
finish: 0:03:39 speed: 92,672 (92,936 -- 92,672) want: 2,880 K/sec
3% sector pos: 1298032/41940408
resync: used:1/61 hits:31926 misses:10 starving:0 locked:0 changed:5
act_log: used:0/1237 hits:0 misses:0 starving:0 locked:0 changed:0
blocked on activity log: 0/0/0
=cut
if ($line =~ /sync'ed: (.*?\%)/)
{
$progress .= $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { progress => $progress }});
}
if ($line =~ /speed: (.*?) \(/)
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed} = ($1 * 1024);
$anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed} += $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:new::resource::${resource}::volume::${volume}::peer::${peer}::replication_speed" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{replication_speed}}).")",
"s2:new::scan_drbd::scan_drbd_total_sync_speed" => $anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed}}).")",
}});
}
if ($line =~ /finish: (\d+):(\d+):(\d+) /)
{
my $hours = $1;
my $minutes = $2;
my $seconds = $3;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:hours' => $hours,
's2:minutes' => $minutes,
's3:seconds' => $seconds,
}});
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync} = (($hours ** 3600) + ($minutes ** 60) + $seconds);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"new::resource::${resource}::volume::${volume}::peer::${peer}::estimated_time_to_sync" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync}." (".$anvil->Convert->time({'time' => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync}, long => 1}).")",
}});
}
}
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s2:new::scan_drbd::scan_drbd_total_sync_speed" => $anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed}." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{new}{scan_drbd}{scan_drbd_total_sync_speed}}).")",
}});
return(1);
}

@ -0,0 +1,357 @@
-- This is the database schema for the 'scan-drbd' Scan Agent.
CREATE TABLE scan_drbd (
scan_drbd_uuid uuid not null primary key,
scan_drbd_host_uuid uuid not null,
scan_drbd_common_xml text not null, -- This is the raw <common> section of 'drbdadm dump-xml'.
scan_drbd_flush_disk boolean not null, -- Set to true when disk flushes are enabled (only safe to be false when FBWC is used)
scan_drbd_flush_md boolean not null, -- Set to true when meta-data flushes are enabled (only safe to be false when FBWC is used)
scan_drbd_timeout numeric not null, -- This is how long we'll wait for a response from a peer (in seconds) before declaring it lost.
scan_drbd_total_sync_speed numeric not null, -- This is the current total sync speed across all resync'ing volumes
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_drbd_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_drbd OWNER TO admin;
CREATE TABLE history.scan_drbd (
history_id bigserial,
scan_drbd_uuid uuid,
scan_drbd_host_uuid uuid,
scan_drbd_common_xml text,
scan_drbd_flush_disk boolean,
scan_drbd_flush_md boolean,
scan_drbd_timeout numeric,
scan_drbd_total_sync_speed numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.scan_drbd OWNER TO admin;
CREATE FUNCTION history_scan_drbd() RETURNS trigger
AS $$
DECLARE
history_scan_drbd RECORD;
BEGIN
SELECT INTO history_scan_drbd * FROM scan_drbd WHERE scan_drbd_uuid=new.scan_drbd_uuid;
INSERT INTO history.scan_drbd
(scan_drbd_uuid,
scan_drbd_host_uuid,
scan_drbd_common_xml,
scan_drbd_flush_disk,
scan_drbd_flush_md,
scan_drbd_timeout,
scan_drbd_total_sync_speed,
modified_date)
VALUES
(history_scan_drbd.scan_drbd_uuid,
history_scan_drbd.scan_drbd_host_uuid,
history_scan_drbd.scan_drbd_common_xml,
history_scan_drbd.scan_drbd_flush_disk,
history_scan_drbd.scan_drbd_flush_md,
history_scan_drbd.scan_drbd_timeout,
history_scan_drbd.scan_drbd_total_sync_speed,
history_scan_drbd.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_drbd() OWNER TO admin;
CREATE TRIGGER trigger_scan_drbd
AFTER INSERT OR UPDATE ON scan_drbd
FOR EACH ROW EXECUTE PROCEDURE history_scan_drbd();
-- This is mostly an anchor for the connections and volumes table
CREATE TABLE scan_drbd_resources (
scan_drbd_resource_uuid uuid not null primary key,
scan_drbd_resource_host_uuid uuid not null,
scan_drbd_resource_name text not null, -- The name of the resource.
scan_drbd_resource_xml text not null, -- This is the raw <common> section of 'drbd_resourceadm dump-xml'.
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_drbd_resource_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_drbd_resources OWNER TO admin;
CREATE TABLE history.scan_drbd_resources (
history_id bigserial,
scan_drbd_resource_uuid uuid,
scan_drbd_resource_host_uuid uuid,
scan_drbd_resource_name text,
scan_drbd_resource_xml text,
modified_date timestamp with time zone not null
);
ALTER TABLE history.scan_drbd_resources OWNER TO admin;
CREATE FUNCTION history_scan_drbd_resources() RETURNS trigger
AS $$
DECLARE
history_scan_drbd_resources RECORD;
BEGIN
SELECT INTO history_scan_drbd_resources * FROM scan_drbd_resources WHERE scan_drbd_resource_uuid=new.scan_drbd_resource_uuid;
INSERT INTO history.scan_drbd_resources
(scan_drbd_resource_uuid,
scan_drbd_resource_host_uuid,
scan_drbd_resource_name,
scan_drbd_resource_xml,
modified_date)
VALUES
(history_scan_drbd_resources.scan_drbd_resource_uuid,
history_scan_drbd_resources.scan_drbd_resource_host_uuid,
history_scan_drbd_resources.scan_drbd_resource_name,
history_scan_drbd_resources.scan_drbd_resource_xml,
history_scan_drbd_resources.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_drbd_resources() OWNER TO admin;
CREATE TRIGGER trigger_scan_drbd_resources
AFTER INSERT OR UPDATE ON scan_drbd_resources
FOR EACH ROW EXECUTE PROCEDURE history_scan_drbd_resources();
-- Volumes under resources.
--
-- Disk States;
-- Diskless - No local block device has been assigned to the DRBD driver. This may mean that the resource
-- has never attached to its backing device, that it has been manually detached using drbdadm
-- detach, or that it automatically detached after a lower-level I/O error.
-- Inconsistent - The data is inconsistent. This status occurs immediately upon creation of a new resource,
-- on both nodes (before the initial full sync). Also, this status is found in one node (the
-- synchronization target) during synchronization.
-- Outdated - Resource data is consistent, but outdated.
-- DUnknown - This state is used for the peer disk if no network connection is available.
-- Consistent - Consistent data of a node without connection. When the connection is established, it is
-- decided whether the data is UpToDate or Outdated.
-- UpToDate - Consistent, up-to-date state of the data. This is the normal state
--
-- NOTE: Transient states are not recorded, but are below for completeness sake
-- Attaching - Transient state while reading meta data.
-- Detaching - Transient state while detaching and waiting for ongoing IOs to complete.
-- Failed - Transient state following an I/O failure report by the local block device. Next state:
-- Diskless.
-- Negotiating - Transient state when an Attach is carried out on an already-Connected DRBD device.
--
-- Resource Roles ;
-- Primary - The resource is currently in the primary role, and may be read from and written to. This role
-- only occurs on one of the two nodes, unless dual-primary mode is enabled.
-- Secondary - The resource is currently in the secondary role. It normally receives updates from its peer
-- (unless running in disconnected mode), but may neither be read from nor written to. This role
-- may occur on one or both nodes.
-- Unknown - The resource’s role is currently unknown. The local resource role never has this status. It is
-- only displayed for the peer’s resource role, and only in disconnected mode.
--
-- Replication states;
-- Off - The volume is not replicated over this connection, since the connection is not Connected.
-- Established - All writes to that volume are replicated online. This is the normal state.
-- StartingSyncS - Full synchronization, initiated by the administrator, is just starting. The next possible
-- states are: SyncSource or PausedSyncS.
-- StartingSyncT - Full synchronization, initiated by the administrator, is just starting. Next state:
-- WFSyncUUID.
-- WFBitMapS - Partial synchronization is just starting. Next possible states: SyncSource or PausedSyncS.
-- WFBitMapT - Partial synchronization is just starting. Next possible state: WFSyncUUID.
-- WFSyncUUID - Synchronization is about to begin. Next possible states: SyncTarget or PausedSyncT.
-- SyncSource - Synchronization is currently running, with the local node being the source of
-- synchronization.
-- SyncTarget - Synchronization is currently running, with the local node being the target of
-- synchronization.
-- PausedSyncS - The local node is the source of an ongoing synchronization, but synchronization is
-- currently paused. This may be due to a dependency on the completion of another
-- synchronization process, or due to synchronization having been manually interrupted by
-- drbdadm pause-sync.
-- PausedSyncT - The local node is the target of an ongoing synchronization, but synchronization is
-- currently paused. This may be due to a dependency on the completion of another
-- synchronization process, or due to synchronization having been manually interrupted by
-- drbdadm pause-sync.
-- VerifyS - On-line device verification is currently running, with the local node being the source of
-- verification.
-- VerifyT - On-line device verification is currently running, with the local node being the target of
-- verification.
-- Ahead - Data replication was suspended, since the link can not cope with the load. This state is
-- enabled by the configuration on-congestion option (see Configuring congestion policies and
-- suspended replication).
-- Behind - Data replication was suspended by the peer, since the link can not cope with the load.
-- This state is enabled by the configuration on-congestion option on the peer node (see
-- Configuring congestion policies and suspended replication).
--
-- Connection States;
--
-- StandAlone - No network configuration available. The resource has not yet been connected, or has been administratively disconnected (using drbdadm disconnect), or has dropped its connection due to failed authentication or split brain.
-- Connecting - This node is waiting until the peer node becomes visible on the network.
-- Connected - A DRBD connection has been established, data mirroring is now active. This is the normal state.
--
-- NOTE: Temporary states are not recorded, but are below for completeness sake
-- Disconnecting - Temporary state during disconnection. The next state is StandAlone.
-- Unconnected - Temporary state, prior to a connection attempt. Possible next states: Connecting.
-- Timeout - Temporary state following a timeout in the communication with the peer. Next state:
-- Unconnected.
-- BrokenPipe - Temporary state after the connection to the peer was lost. Next state: Unconnected.
-- NetworkFailure - Temporary state after the connection to the partner was lost. Next state: Unconnected.
-- ProtocolError - Temporary state after the connection to the partner was lost. Next state: Unconnected.
-- TearDown - Temporary state. The peer is closing the connection. Next state: Unconnected.
-- NOTE: This table stores the information about this volume on the local host.
CREATE TABLE scan_drbd_volumes (
scan_drbd_volume_uuid uuid not null primary key,
scan_drbd_volume_host_uuid uuid not null,
scan_drbd_volume_scan_drbd_resource_uuid uuid not null,
scan_drbd_volume_number numeric not null, -- The name of the volume.
scan_drbd_volume_device_path text not null, -- This is the device path to the DRBD resource
scan_drbd_volume_device_minor numeric not null, -- This is the device minor number, which translates to '/dev/drbd<minor>'
scan_drbd_volume_size numeric not null, -- This is size of the DRBD device (in bytes)
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_drbd_volume_scan_drbd_resource_uuid) REFERENCES scan_drbd_resources(scan_drbd_resource_uuid),
FOREIGN KEY(scan_drbd_volume_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_drbd_volumes OWNER TO admin;
CREATE TABLE history.scan_drbd_volumes (
history_id bigserial,
scan_drbd_volume_uuid uuid,
scan_drbd_volume_host_uuid uuid,
scan_drbd_volume_scan_drbd_resource_uuid uuid,
scan_drbd_volume_number numeric,
scan_drbd_volume_device_path text,
scan_drbd_volume_device_minor numeric,
scan_drbd_volume_size numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.scan_drbd_volumes OWNER TO admin;
CREATE FUNCTION history_scan_drbd_volumes() RETURNS trigger
AS $$
DECLARE
history_scan_drbd_volumes RECORD;
BEGIN
SELECT INTO history_scan_drbd_volumes * FROM scan_drbd_volumes WHERE scan_drbd_volume_uuid=new.scan_drbd_volume_uuid;
INSERT INTO history.scan_drbd_volumes
(scan_drbd_volume_uuid,
scan_drbd_volume_host_uuid,
scan_drbd_volume_scan_drbd_resource_uuid,
scan_drbd_volume_number,
scan_drbd_volume_device_path,
scan_drbd_volume_device_minor,
scan_drbd_volume_size,
modified_date)
VALUES
(history_scan_drbd_volumes.scan_drbd_volume_uuid,
history_scan_drbd_volumes.scan_drbd_volume_host_uuid,
history_scan_drbd_volumes.scan_drbd_volume_scan_drbd_resource_uuid,
history_scan_drbd_volumes.scan_drbd_volume_number,
history_scan_drbd_volumes.scan_drbd_volume_device_path,
history_scan_drbd_volumes.scan_drbd_volume_device_minor,
history_scan_drbd_volumes.scan_drbd_volume_size,
history_scan_drbd_volumes.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_drbd_volumes() OWNER TO admin;
CREATE TRIGGER trigger_scan_drbd_volumes
AFTER INSERT OR UPDATE ON scan_drbd_volumes
FOR EACH ROW EXECUTE PROCEDURE history_scan_drbd_volumes();
-- This is the peer information for a given volume
CREATE TABLE scan_drbd_peers (
scan_drbd_peer_uuid uuid not null primary key,
scan_drbd_peer_host_uuid uuid not null,
scan_drbd_peer_scan_drbd_volume_uuid uuid not null,
scan_drbd_peer_peer_host_name text not null, -- The host name for this peer, as recorded in the config
scan_drbd_peer_connection_state text not null, -- The connection state to the peer. See "Connection States" and "Replication States" above.
scan_drbd_peer_local_disk_state text not null, -- The local disk state of the peer, see "Disk States" above.
scan_drbd_peer_peer_disk_state text not null, -- The local disk state of the peer, see "Disk States" above.
scan_drbd_peer_local_role text not null, -- The current local role of the peer.
scan_drbd_peer_peer_role text not null, -- The current peer role of the peer.
scan_drbd_peer_out_of_sync_size numeric not null, -- This is the number of "out of sync" bytes. Set to '0' when both sides are UpToDate.
scan_drbd_peer_replication_speed numeric not null, -- This is how many bytes per second are being copied. Set to '0' when not synchronizing.
scan_drbd_peer_estimated_time_to_sync numeric not null, -- This is the number of second that is *estimated* remaining in the resync. Set to '0' when both sides are UpToDate.
scan_drbd_peer_peer_ip_address text not null, -- The (SN) IP address used for this peer.
scan_drbd_peer_tcp_port numeric not null, -- This is the port number used for this peer.
scan_drbd_peer_protocol text not null, -- This is 'A' for async peers (to DR, usually) or 'C' to sync peers (node peer and sometimes DR)
scan_drbd_peer_fencing text not null, -- Set to 'resource-and-stonith' for node peers and 'dont-care' for DR hosts.
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_drbd_peer_scan_drbd_volume_uuid) REFERENCES scan_drbd_resources(scan_drbd_resource_uuid),
FOREIGN KEY(scan_drbd_peer_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_drbd_peers OWNER TO admin;
CREATE TABLE history.scan_drbd_peers (
history_id bigserial,
scan_drbd_peer_uuid uuid,
scan_drbd_peer_host_uuid uuid,
scan_drbd_peer_scan_drbd_volume_uuid uuid,
scan_drbd_peer_peer_host_name text,
scan_drbd_peer_connection_state text,
scan_drbd_peer_local_disk_state text,
scan_drbd_peer_peer_disk_state text,
scan_drbd_peer_local_role text,
scan_drbd_peer_peer_role text,
scan_drbd_peer_out_of_sync_size numeric,
scan_drbd_peer_replication_speed numeric,
scan_drbd_peer_estimated_time_to_sync numeric,
scan_drbd_peer_peer_ip_address text,
scan_drbd_peer_tcp_port numeric,
scan_drbd_peer_protocol text,
scan_drbd_peer_fencing text,
modified_date timestamp with time zone not null
);
ALTER TABLE history.scan_drbd_peers OWNER TO admin;
CREATE FUNCTION history_scan_drbd_peers() RETURNS trigger
AS $$
DECLARE
history_scan_drbd_peers RECORD;
BEGIN
SELECT INTO history_scan_drbd_peers * FROM scan_drbd_peers WHERE scan_drbd_peer_uuid=new.scan_drbd_peer_uuid;
INSERT INTO history.scan_drbd_peers
(scan_drbd_peer_uuid,
scan_drbd_peer_host_uuid,
scan_drbd_peer_scan_drbd_volume_uuid,
scan_drbd_peer_peer_host_name,
scan_drbd_peer_connection_state,
scan_drbd_peer_local_disk_state,
scan_drbd_peer_peer_disk_state,
scan_drbd_peer_local_role,
scan_drbd_peer_peer_role,
scan_drbd_peer_out_of_sync_size,
scan_drbd_peer_replication_speed,
scan_drbd_peer_estimated_time_to_sync,
scan_drbd_peer_peer_ip_address,
scan_drbd_peer_tcp_port,
scan_drbd_peer_protocol,
scan_drbd_peer_fencing,
modified_date)
VALUES
(history_scan_drbd_peers.scan_drbd_peer_uuid,
history_scan_drbd_peers.scan_drbd_peer_host_uuid,
history_scan_drbd_peers.scan_drbd_peer_scan_drbd_volume_uuid,
history_scan_drbd_peers.scan_drbd_peer_peer_host_name,
history_scan_drbd_peers.scan_drbd_peer_connection_state,
history_scan_drbd_peers.scan_drbd_peer_local_disk_state,
history_scan_drbd_peers.scan_drbd_peer_peer_disk_state,
history_scan_drbd_peers.scan_drbd_peer_local_role,
history_scan_drbd_peers.scan_drbd_peer_peer_role,
history_scan_drbd_peers.scan_drbd_peer_out_of_sync_size,
history_scan_drbd_peers.scan_drbd_peer_replication_speed,
history_scan_drbd_peers.scan_drbd_peer_estimated_time_to_sync,
history_scan_drbd_peers.scan_drbd_peer_peer_ip_address,
history_scan_drbd_peers.scan_drbd_peer_tcp_port,
history_scan_drbd_peers.scan_drbd_peer_protocol,
history_scan_drbd_peers.scan_drbd_peer_fencing,
history_scan_drbd_peers.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_drbd_peers() OWNER TO admin;
CREATE TRIGGER trigger_scan_drbd_peers
AFTER INSERT OR UPDATE ON scan_drbd_peers
FOR EACH ROW EXECUTE PROCEDURE history_scan_drbd_peers();

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Company: Alteeve's Niche, Inc.
License: GPL v2+
Author: Madison Kelly <mkelly@alteeve.ca>
NOTE: All string keys MUST be prefixed with the agent name! ie: 'scan_server_log_0001'.
-->
<words>
<meta version="3.0.0" languages="en_CA,jp"/>
<!-- Canadian English -->
<language name="en_CA" long_name="Canadian English" description="ScanCore scan agent that monitors hardware, like RAM modules, CSS LED status, CPU information, etc.">
<!-- Alert entries -->
<key name="scan_drbd_alert_0001"></key>
<!-- Error entries -->
<key name="scan_drbd_error_0001">DRBD is not configured on this host, exiting.</key>
<key name="scan_drbd_error_0002">The call to 'drbdadm dump-xml' returned the exit code: [#!variable!return_code!#].</key>
<key name="scan_drbd_error_0003">[ Warning ] - Failed to parse the DRBD XML. The XML read was:
========
#!variable!xml!#
========
The error was:
========
#!variable!error!#
========
</key>
<!-- Error entries -->
<key name="scan_drbd_log_0001">Starting The: [#!variable!program!#] DRBD resource agent.</key>
</language>
</words>

@ -1,37 +1,29 @@
#!/usr/bin/perl
#
use warnings;
use strict;
my $sysstat_directory = "/var/log/sa/";
my $hostname = `hostname | cut -f 1 -d .`;
opendir(my $directory_handle, $sysstat_directory) || die "Can't locate ".$sysstat_directory."\n";
my @file_list = grep { /^sa[0-9]./ } readdir($directory_handle);
printf "Hostname is ... ".$hostname."\n";
foreach my $filename (sort {$a cmp $b} @file_list)
use warnings;
use Anvil::Tools;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
#printf "Filepath: ....".$sysstat_directory.$filepath."\n"
my $shell_call = "sadf -dht ".$sysstat_directory.$filename." -- -S -u -r -p -q -n DEV";
printf "Shell Call - ... ".$shell_call."\n";
open(my $file_handle, "$shell_call 2>&1 |") || die "Failed to parse output of [".$shell_call."].\n";
while (<$file_handle>)
{
chomp;
my $csv_line = $_;
if ($csv_line =~ /$hostname/)
{
#printf "CSV Line... ".$csv_line."\n";
printf "Variable Match!\n";
}
if ($csv_line =~ 'thinkpad-06HCV0')
{
printf "String Match!\n";
}
}
}
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches;
# Connect to the database(s).
$anvil->Database->connect;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"});
$anvil->nice_exit({exit_code => 0});

Loading…
Cancel
Save