Local modifications to ClusterLabs/Anvil by Alteeve
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

677 lines
33 KiB

#!/usr/bin/perl
#
# This scans the cluster by processing the CIB and storing information about nodes, cluster states, etc.
#
# NOTE: The data stored here is not bound to a given host. As such, node 1 will run this agent and node 2
# won't, _except_ when node 1 is offline and only node 2 is up.
#
# Examples;
#
# Exit codes;
# 0 = Normal exit.
# 1 = Startup failure (not running as root, no DB, bad file read, etc)
# 2 = Not a cluster member
#
# TODO:
#
use strict;
use warnings;
use Anvil::Tools;
use Data::Dumper;
# Disable buffering
$| = 1;
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
$< = $>;
$( = $);
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
my $anvil = Anvil::Tools->new();
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
$anvil->data->{scancore}{'scan-cluster'}{disable} = 0;
$anvil->data->{switches}{force} = 0;
$anvil->Storage->read_config();
# Read switches
$anvil->Get->switches;
# If we're disabled and '--force' wasn't used, exit.
if (($anvil->data->{scancore}{'scan-cluster'}{disable}) && (not $anvil->data->{switches}{force}))
{
# Exit.
$anvil->nice_exit({exit_code => 0});
}
# Handle start-up tasks
my $problem = $anvil->ScanCore->agent_startup({agent => $THIS_FILE});
if ($problem)
{
$anvil->nice_exit({exit_code => 1});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
if ($anvil->data->{switches}{purge})
{
# This can be called when doing bulk-database purges.
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
my $schema_file = $anvil->data->{path}{directories}{scan_agents}."/".$THIS_FILE."/".$THIS_FILE.".sql";
$anvil->Database->purge_data({
debug => 2,
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
tables => $anvil->Database->get_tables_from_schema({schema_file => $schema_file}),
});
$anvil->nice_exit({exit_code => 0});
}
# Before we do anything, are we a node in a pacemaker cluster?
my $host_type = $anvil->Get->host_type;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
if ($host_type ne "node")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_cluster_log_0002", variables => { host_type => $host_type }});
$anvil->nice_exit({exit_code => 0});
}
# Read last scan
read_last_scan($anvil);
# Read and process in one shot.
collect_data($anvil);
# Find changes.
find_changes($anvil);
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :) * Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively. * Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf. * Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state. * Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested. * Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times. * Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan. * Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
# Check the cluster config.
check_config($anvil);
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :) * Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively. * Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf. * Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state. * Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested. * Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times. * Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan. * Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
sub check_config
{
my ($anvil) = @_;
$anvil->Database->get_manifests();
my $anvil_name = $anvil->Cluster->get_anvil_name({});
my $manifest_uuid = $anvil->data->{manifests}{manifest_name}{$anvil_name}{manifest_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_name => $anvil_name,
manifest_uuid => $manifest_uuid,
}});
if ($manifest_uuid)
{
# Check to see if the stonith config needs to be updated.
$anvil->Cluster->check_stonith_config({debug => 2});
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :) * Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively. * Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf. * Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state. * Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested. * Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times. * Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan. * Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
}
return(0);
}
# Looks for changes.
sub find_changes
{
my ($anvil) = @_;
# We can't track a cluster through name change, so either we're INSERTing a new one, or bust.
my $scan_cluster_anvil_uuid = $anvil->Cluster->get_anvil_uuid();
my $anvil_name = $anvil->Get->anvil_name_from_uuid({anvil_uuid => $scan_cluster_anvil_uuid});
my $scan_cluster_uuid = "";
my $cluster_name = $anvil->data->{cib}{parsed}{data}{cluster}{name};
my $stonith_enabled = $anvil->data->{cib}{parsed}{data}{stonith}{enabled};
my $stonith_max_attempts = $anvil->data->{cib}{parsed}{data}{stonith}{'max-attempts'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
cluster_name => $cluster_name,
stonith_enabled => $stonith_enabled,
stonith_max_attempts => $stonith_max_attempts,
}});
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
# If we're a full cluster member, read the CIB as well.
my $cluster_cib = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "cib::parsed::local::ready" => $anvil->data->{cib}{parsed}{'local'}{ready} }});
if ($anvil->data->{cib}{parsed}{'local'}{ready})
{
$cluster_cib = $anvil->Storage->read_file({cache => 0, force_read => 1, debug => 2, file => $anvil->data->{path}{configs}{'cib.xml'}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_cib => $cluster_cib }});
}
if (exists $anvil->data->{sql}{anvil_uuid}{$scan_cluster_anvil_uuid})
{
# Check for a name change
$scan_cluster_uuid = $anvil->data->{sql}{anvil_uuid}{$scan_cluster_anvil_uuid};
my $old_cluster_name = $anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_name};
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
my $old_cluster_cib = $anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_cib};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
scan_cluster_uuid => $scan_cluster_uuid,
old_cluster_name => $old_cluster_name,
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
old_cluster_cib => $old_cluster_cib,
}});
if ($cluster_name ne $old_cluster_name)
{
# The name of the cluster has changed.
my $query = "
UPDATE
scan_cluster
SET
scan_cluster_name = ".$anvil->Database->quote($cluster_name).",
modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
WHERE
scan_cluster_uuid = ".$anvil->Database->quote($scan_cluster_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
my $variables = {
new_cluster_name => $cluster_name,
old_cluster_name => $old_cluster_name,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0002", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0002", variables => $variables, set_by => $THIS_FILE});
}
if (($cluster_cib) && ($cluster_cib ne $old_cluster_cib))
{
my $query = "
UPDATE
scan_cluster
SET
scan_cluster_cib = ".$anvil->Database->quote($cluster_name).",
modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
WHERE
scan_cluster_uuid = ".$anvil->Database->quote($scan_cluster_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
my $variables = {
new_cluster_name => $cluster_name,
old_cluster_name => $old_cluster_name,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0002", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0002", variables => $variables, set_by => $THIS_FILE});
}
}
else
{
# New cluster, INSERT
$scan_cluster_uuid = $anvil->Get->uuid();
my $query = "
INSERT INTO
scan_cluster
(
scan_cluster_uuid,
scan_cluster_anvil_uuid,
scan_cluster_name,
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
scan_cluster_cib,
modified_date
) VALUES (
".$anvil->Database->quote($scan_cluster_uuid).",
".$anvil->Database->quote($scan_cluster_anvil_uuid).",
".$anvil->Database->quote($cluster_name).",
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
".$anvil->Database->quote($cluster_cib).",
".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
);";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
my $variables = { cluster_name => $cluster_name };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0001", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0001", variables => $variables, set_by => $THIS_FILE});
}
$anvil->Database->get_anvils();
foreach my $scan_cluster_node_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}})
{
my $scan_cluster_node_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $scan_cluster_node_name});
my $scan_cluster_node_pacemaker_id = $anvil->data->{cib}{parsed}{data}{node}{$scan_cluster_node_name}{node_state}{pacemaker_id};
my $scan_cluster_node_in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$scan_cluster_node_name}{node_state}{in_ccm};
my $scan_cluster_node_crmd_member = $anvil->data->{cib}{parsed}{data}{node}{$scan_cluster_node_name}{node_state}{crmd};
my $scan_cluster_node_cluster_member = $anvil->data->{cib}{parsed}{data}{node}{$scan_cluster_node_name}{node_state}{'join'};
my $scan_cluster_node_maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$scan_cluster_node_name}{node_state}{'maintenance-mode'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
scan_cluster_node_name => $scan_cluster_node_name,
scan_cluster_node_host_uuid => $scan_cluster_node_host_uuid,
scan_cluster_node_pacemaker_id => $scan_cluster_node_pacemaker_id,
scan_cluster_node_in_ccm => $scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member => $scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member => $scan_cluster_node_cluster_member,
scan_cluster_node_maintenance_mode => $scan_cluster_node_maintenance_mode,
}});
if (exists $anvil->data->{sql}{scan_cluster_node_host_uuid}{$scan_cluster_node_host_uuid})
{
# Look for changes.
my $scan_cluster_node_uuid = $anvil->data->{sql}{scan_cluster_node_host_uuid}{$scan_cluster_node_host_uuid};
my $old_scan_cluster_node_name = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_name};
my $old_scan_cluster_node_pacemaker_id = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_pacemaker_id};
my $old_scan_cluster_node_in_ccm = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_in_ccm};
my $old_scan_cluster_node_crmd_member = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_crmd_member};
my $old_scan_cluster_node_cluster_member = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_cluster_member};
my $old_scan_cluster_node_maintenance_mode = $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_maintenance_mode};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
scan_cluster_node_uuid => $scan_cluster_node_uuid,
old_scan_cluster_node_name => $old_scan_cluster_node_name,
old_scan_cluster_node_pacemaker_id => $old_scan_cluster_node_pacemaker_id,
old_scan_cluster_node_in_ccm => $old_scan_cluster_node_in_ccm,
old_scan_cluster_node_crmd_member => $old_scan_cluster_node_crmd_member,
old_scan_cluster_node_cluster_member => $old_scan_cluster_node_cluster_member,
old_scan_cluster_node_maintenance_mode => $old_scan_cluster_node_maintenance_mode,
}});
my $update = 0;
if ($scan_cluster_node_name ne $old_scan_cluster_node_name)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
new_node_name => $scan_cluster_node_name,
old_node_name => $old_scan_cluster_node_name,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0008", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0009", variables => $variables, set_by => $THIS_FILE});
}
if ($scan_cluster_node_pacemaker_id ne $old_scan_cluster_node_pacemaker_id)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
node_name => $scan_cluster_node_name,
new_pacemaker_id => $scan_cluster_node_pacemaker_id ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
old_pacemaker_id => $old_scan_cluster_node_pacemaker_id ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0004", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0004", variables => $variables, set_by => $THIS_FILE});
}
if ($scan_cluster_node_in_ccm ne $old_scan_cluster_node_in_ccm)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
node_name => $scan_cluster_node_name,
new_in_ccm => $scan_cluster_node_in_ccm ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
old_in_ccm => $old_scan_cluster_node_in_ccm ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0005", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0005", variables => $variables, set_by => $THIS_FILE});
}
if ($scan_cluster_node_crmd_member ne $old_scan_cluster_node_crmd_member)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
node_name => $scan_cluster_node_name,
new_crmd_member => $scan_cluster_node_crmd_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
old_crmd_member => $old_scan_cluster_node_crmd_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0006", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0006", variables => $variables, set_by => $THIS_FILE});
}
if ($scan_cluster_node_cluster_member ne $old_scan_cluster_node_cluster_member)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
node_name => $scan_cluster_node_name,
new_cluster_member => $scan_cluster_node_cluster_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
old_cluster_member => $old_scan_cluster_node_cluster_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0007", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0007", variables => $variables, set_by => $THIS_FILE});
}
if ($scan_cluster_node_maintenance_mode ne $old_scan_cluster_node_maintenance_mode)
{
$update = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
my $variables = {
node_name => $scan_cluster_node_name,
new_maintenance_mode => $scan_cluster_node_maintenance_mode ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
old_maintenance_mode => $old_scan_cluster_node_maintenance_mode ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0008", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0008", variables => $variables, set_by => $THIS_FILE});
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { update => $update }});
if ($update)
{
my $query = "
UPDATE
scan_cluster_nodes
SET
scan_cluster_node_name = ".$anvil->Database->quote($scan_cluster_node_name).",
scan_cluster_node_pacemaker_id = ".$anvil->Database->quote($scan_cluster_node_pacemaker_id).",
scan_cluster_node_in_ccm = ".$anvil->Database->quote($scan_cluster_node_in_ccm).",
scan_cluster_node_crmd_member = ".$anvil->Database->quote($scan_cluster_node_crmd_member).",
scan_cluster_node_cluster_member = ".$anvil->Database->quote($scan_cluster_node_cluster_member).",
scan_cluster_node_maintenance_mode = ".$anvil->Database->quote($scan_cluster_node_maintenance_mode).",
modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
WHERE
scan_cluster_node_uuid = ".$anvil->Database->quote($scan_cluster_node_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
}
else
{
# Add the node.
my $scan_cluster_node_uuid = $anvil->Get->uuid();
my $query = "
INSERT INTO
scan_cluster_nodes
(
scan_cluster_node_uuid,
scan_cluster_node_scan_cluster_uuid,
scan_cluster_node_host_uuid,
scan_cluster_node_name,
scan_cluster_node_pacemaker_id,
scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member,
scan_cluster_node_maintenance_mode,
modified_date
) VALUES (
".$anvil->Database->quote($scan_cluster_node_uuid).",
".$anvil->Database->quote($scan_cluster_uuid).",
".$anvil->Database->quote($scan_cluster_node_host_uuid).",
".$anvil->Database->quote($scan_cluster_node_name).",
".$anvil->Database->quote($scan_cluster_node_pacemaker_id).",
".$anvil->Database->quote($scan_cluster_node_in_ccm).",
".$anvil->Database->quote($scan_cluster_node_crmd_member).",
".$anvil->Database->quote($scan_cluster_node_cluster_member).",
".$anvil->Database->quote($scan_cluster_node_maintenance_mode).",
".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
);";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
my $host_name = $anvil->Get->host_name_from_uuid({host_uuid => $scan_cluster_node_host_uuid});
my $variables = {
cluster_name => $cluster_name,
node_name => $scan_cluster_node_name,
host_uuid => $scan_cluster_node_host_uuid,
host_name => $host_name,
pacemaker_id => $scan_cluster_node_pacemaker_id,
in_ccm => $scan_cluster_node_in_ccm ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#", # Yes or No
crmd_member => $scan_cluster_node_crmd_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
cluster_member => $scan_cluster_node_cluster_member ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
maintenance_mode => $scan_cluster_node_maintenance_mode ? "#!string!scan_cluster_unit_0001!#" : "#!string!scan_cluster_unit_0002!#",
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0003", variables => $variables});
$anvil->Alert->register({debug => 2, alert_level => "notice", message => "scan_cluster_alert_0003", variables => $variables, set_by => $THIS_FILE});
}
}
### TODO: Check for / repair bad cluster config issues
# If we're still alive, we're either node 1, or we're node 2 and node 1 is not ready. If we're not ready,
if ($stonith_max_attempts ne "INFINITY")
{
### TODO: Call pcs to update
}
return(0);
}
# Read in existing data from the database.
sub read_last_scan
{
my ($anvil) = @_;
my $query = "
SELECT
scan_cluster_uuid,
scan_cluster_anvil_uuid,
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
scan_cluster_name,
scan_cluster_cib
FROM
scan_cluster
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
# NOTE: There's no known way to track a cluster name change, so we can't really avoid having
# an entery per cluster name.
my $scan_cluster_uuid = $row->[0];
my $scan_cluster_anvil_uuid = $row->[1];
my $scan_cluster_name = $row->[2];
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
my $scan_cluster_cib = $row->[3];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
"s1:scan_cluster_uuid" => $scan_cluster_uuid,
"s2:scan_cluster_anvil_uuid" => $scan_cluster_anvil_uuid,
"s3:scan_cluster_name" => $scan_cluster_name,
"s4:scan_cluster_cib" => $scan_cluster_cib,
}});
# Store the old data now.
$anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_name} = $scan_cluster_name;
$anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_anvil_uuid} = $scan_cluster_anvil_uuid;
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
$anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_cib} = $scan_cluster_cib;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sql::scan_cluster::scan_cluster_uuid::${scan_cluster_uuid}::scan_cluster_name" => $anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_name},
"sql::scan_cluster::scan_cluster_uuid::${scan_cluster_uuid}::scan_cluster_anvil_uuid" => $anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_anvil_uuid},
* THe get_cpu endpoint was completed. * The get_mmeory endpoint was completed. * The get_replicated_storage endpoint was completed, though it requires testing and likely has issues. To prepare for the get_status endpoint work, I needed to update ScanCore and modules to track the host_status. This commit contains the work needed for this. * Updated ScanCore->post_scan_analysis_striker() to use configured fence devices (except PDUs) to check if a target host is off or on, in there is no host_ipmi interface. In all cases, if a machine can be confirmed on or off, the host_status is now updated. * To support the above fence based power checks, updated scan-cluster to store the on-disk CIB in the new scan_cluster -> scan_cluster_cib colume. * Updated ScanCore->parse_cib() to map stonith primitive IDs to fence agents. Updated ->parse_crm_mon() to not call if the executable doesn't exist to avoid unhelpful error messages in the logs when called from a Striker. * Update DRBD->gather_data() to get the size data from /sys/block/drbd<minor>/size' x '/sys/block/drbd<minor>/queue/logical_block_size so it works when a device is Secondary (and can't be promoted). * Updated Database->get_hosts_info() to record the short host name as well as the stored host name. Created ->update_host_status() as a wrapper to ->insert_or_update_hosts() that only updates the host status. * Updated anvil-join-anvil to disabled ksm and ksmtuned daemons. * Updated scancore and anvil-daemon to set the host_status to 'online' on startup. Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
"sql::scan_cluster::scan_cluster_uuid::${scan_cluster_uuid}::scan_cluster_cib" => $anvil->data->{sql}{scan_cluster}{scan_cluster_uuid}{$scan_cluster_uuid}{scan_cluster_cib},
}});
# Make it easy to look up the cluster_uuid from the anvil_uuid.
$anvil->data->{sql}{anvil_uuid}{$scan_cluster_anvil_uuid} = $scan_cluster_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sql::anvil_uuid::${scan_cluster_anvil_uuid}" => $anvil->data->{sql}{anvil_uuid}{$scan_cluster_anvil_uuid},
}});
}
undef $count;
undef $results;
$query = "
SELECT
scan_cluster_node_uuid,
scan_cluster_node_scan_cluster_uuid,
scan_cluster_node_host_uuid,
scan_cluster_node_name,
scan_cluster_node_pacemaker_id,
scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member,
scan_cluster_node_maintenance_mode
FROM
scan_cluster_nodes
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
$count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
# We've got an entry in the 'scan_cluster_nodes' table, so now we'll look for data in the node and
# services tables.
my $scan_cluster_node_uuid = $row->[0];
my $scan_cluster_node_scan_cluster_uuid = $row->[1];
my $scan_cluster_node_host_uuid = $row->[2];
my $scan_cluster_node_name = $row->[3];
my $scan_cluster_node_pacemaker_id = $row->[4];
my $scan_cluster_node_in_ccm = $row->[5];
my $scan_cluster_node_crmd_member = $row->[6];
my $scan_cluster_node_cluster_member = $row->[7];
my $scan_cluster_node_maintenance_mode = $row->[8];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
scan_cluster_node_uuid => $scan_cluster_node_uuid,
scan_cluster_node_scan_cluster_uuid => $scan_cluster_node_scan_cluster_uuid,
scan_cluster_node_host_uuid => $scan_cluster_node_host_uuid,
scan_cluster_node_name => $scan_cluster_node_name,
scan_cluster_node_pacemaker_id => $scan_cluster_node_pacemaker_id,
scan_cluster_node_in_ccm => $scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member => $scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member => $scan_cluster_node_cluster_member,
scan_cluster_node_maintenance_mode => $scan_cluster_node_maintenance_mode,
}});
# Store the old data now.
$anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid} = {
scan_cluster_node_scan_cluster_uuid => $scan_cluster_node_scan_cluster_uuid,
scan_cluster_node_host_uuid => $scan_cluster_node_host_uuid,
scan_cluster_node_name => $scan_cluster_node_name,
scan_cluster_node_pacemaker_id => $scan_cluster_node_pacemaker_id,
scan_cluster_node_in_ccm => $scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member => $scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member => $scan_cluster_node_cluster_member,
scan_cluster_node_maintenance_mode => $scan_cluster_node_maintenance_mode,
};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_scan_cluster_uuid" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_scan_cluster_uuid},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_host_uuid" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_host_uuid},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_name" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_name},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_pacemaker_id" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_pacemaker_id},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_in_ccm" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_in_ccm},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_crmd_member" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_crmd_member},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_cluster_member" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_cluster_member},
"sql::scan_cluster_node::scan_cluster_node_uuid::${scan_cluster_node_uuid}::scan_cluster_node_maintenance_mode" => $anvil->data->{sql}{scan_cluster_node}{scan_cluster_node_uuid}{$scan_cluster_node_uuid}{scan_cluster_node_maintenance_mode},
}});
$anvil->data->{sql}{scan_cluster_node_host_uuid}{$scan_cluster_node_host_uuid} = $scan_cluster_node_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sql::scan_cluster_node_host_uuid::${scan_cluster_node_host_uuid}" => $anvil->data->{sql}{scan_cluster_node_host_uuid}{$scan_cluster_node_host_uuid},
}});
}
return(0);
}
# This reads in all the data we can find on the local system
sub collect_data
{
my ($anvil) = @_;
# Pick out core cluster details.
my $problem = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
# If there was a problem, we're not in the cluster.
if ($problem)
{
my $changed = $anvil->Alert->check_alert_sent({
record_locator => "scan_cluster::in_cluster",
set_by => $THIS_FILE,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }});
if ($changed)
{
# Register an alert.
my $variables = { host_name => $anvil->Get->host_name() };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0010", variables => $variables});
$anvil->Alert->register({alert_level => "warning", message => "scan_cluster_alert_0010", variables => $variables, set_by => $THIS_FILE});
# See if I need to mark us as out of the cluster. Normally, our peer would do this,
# but if we went down at the same time as our peer, both of us might not update the
# membership values.
my $query = "
SELECT
scan_cluster_node_uuid,
scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member
FROM
scan_cluster_nodes
WHERE
scan_cluster_node_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
# We've got an entry in the 'scan_cluster_nodes' table, so now we'll look for data in the node and
# services tables.
my $scan_cluster_node_uuid = $row->[0];
my $scan_cluster_node_in_ccm = $row->[1];
my $scan_cluster_node_crmd_member = $row->[2];
my $scan_cluster_node_cluster_member = $row->[3];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
scan_cluster_node_uuid => $scan_cluster_node_uuid,
scan_cluster_node_in_ccm => $scan_cluster_node_in_ccm,
scan_cluster_node_crmd_member => $scan_cluster_node_crmd_member,
scan_cluster_node_cluster_member => $scan_cluster_node_cluster_member,
}});
if (($scan_cluster_node_in_ccm) or ($scan_cluster_node_crmd_member) or ($scan_cluster_node_cluster_member))
{
# Update
my $query = "
UPDATE
scan_cluster_nodes
SET
scan_cluster_node_in_ccm = '0',
scan_cluster_node_crmd_member = '0',
scan_cluster_node_cluster_member = '0',
modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})."
WHERE
scan_cluster_node_uuid = ".$anvil->Database->quote($scan_cluster_node_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__});
}
}
}
# Exit now.
$anvil->nice_exit({exit_code => 2});
}
else
{
# See if we came back into the cluster
my $changed = $anvil->Alert->check_alert_sent({
clear => 1,
record_locator => "scan_cluster::in_cluster",
set_by => $THIS_FILE,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changed => $changed }});
if ($changed)
{
# Register an alert.
my $variables = { host_name => $anvil->Get->host_name() };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_cluster_alert_0011", variables => $variables});
$anvil->Alert->register({alert_level => "warning", clear_alert => 1, message => "scan_cluster_alert_0011", variables => $variables, set_by => $THIS_FILE});
}
}
return(0);
}