diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm
index 2335d59b..c82b803f 100644
--- a/Anvil/Tools.pm
+++ b/Anvil/Tools.pm
@@ -1009,6 +1009,16 @@ sub _set_defaults
html => "alteeve",
},
};
+ $anvil->data->{feature} = {
+ scancore => {
+ disable => {
+ 'preventative-live-migration' => 0,
+ },
+ threshold => {
+ 'preventative-live-migration' => 2,
+ },
+ },
+ };
return(0);
}
diff --git a/Anvil/Tools/Alert.pm b/Anvil/Tools/Alert.pm
index dc1d322f..bd27b126 100644
--- a/Anvil/Tools/Alert.pm
+++ b/Anvil/Tools/Alert.pm
@@ -276,7 +276,7 @@ sub check_condition_age
my $clear = defined $parameter->{clear} ? $parameter->{clear} : 0;
my $name = defined $parameter->{name} ? $parameter->{name} : "";
- my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : "NULL";
+ my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
clear => $clear,
name => $name,
diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm
index ca529f15..2467c7b4 100644
--- a/Anvil/Tools/Database.pm
+++ b/Anvil/Tools/Database.pm
@@ -1625,7 +1625,7 @@ sub connect
# Read the DB identifier and then check that we've not already connected to this DB.
my $query = "SELECT system_identifier FROM pg_control_system();";
- my $identifier = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0];
+ my $identifier = $anvil->Database->query({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
query => $query,
identifier => $identifier,
@@ -1691,7 +1691,7 @@ sub connect
variable_name => "database::".$uuid."::active",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { active_value => $active_value }});
- if (not $active_value)
+ if ($active_value eq "0")
{
# If we're "retry", we just started up.
if (($retry) && ($is_local))
diff --git a/Anvil/Tools/ScanCore.pm b/Anvil/Tools/ScanCore.pm
index da503dd4..7e6cf3bf 100644
--- a/Anvil/Tools/ScanCore.pm
+++ b/Anvil/Tools/ScanCore.pm
@@ -2022,13 +2022,25 @@ sub post_scan_analysis_node
# Last, evaluate health if we're otherwise OK
if ($peer_health > $local_health)
{
+ # The user may have set a migration threashold.
+ my $difference = $peer_health - $local_health;
+ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { difference => $difference }});
+
+ if (not $anvil->data->{feature}{scancore}{threshold}{'preventative-live-migration'})
+ {
+ $anvil->data->{feature}{scancore}{threshold}{'preventative-live-migration'} = 2;
+ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
+ 'feature::scancore::threshold::preventative-live-migration' => $anvil->data->{feature}{scancore}{threshold}{'preventative-live-migration'},
+ }});
+ }
+
# A user may disable health-based preventative live migrations.
if ($anvil->data->{feature}{scancore}{disable}{'preventative-live-migration'})
{
# Do nothing.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "message_0239"});
}
- else
+ elsif ($difference >= $anvil->data->{feature}{scancore}{threshold}{'preventative-live-migration'})
{
# How long has this been the case?
my $age = $anvil->Alert->check_condition_age({
diff --git a/scancore-agents/scan-apc-pdu/scan-apc-pdu b/scancore-agents/scan-apc-pdu/scan-apc-pdu
index 940f75be..0dcfb2d3 100755
--- a/scancore-agents/scan-apc-pdu/scan-apc-pdu
+++ b/scancore-agents/scan-apc-pdu/scan-apc-pdu
@@ -260,7 +260,7 @@ FROM
my $scan_apc_pdu_link_speed = $row->[10];
my $scan_apc_pdu_phase_count = $row->[11];
my $scan_apc_pdu_outlet_count = $row->[12];
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
+ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
scan_apc_pdu_uuid => $scan_apc_pdu_uuid,
scan_apc_pdu_fence_uuid => $scan_apc_pdu_fence_uuid,
scan_apc_pdu_serial_number => $scan_apc_pdu_serial_number,
@@ -1385,6 +1385,7 @@ WHERE
}
# Delete this from the SQL hash so we know it didn't vanish.
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0726", variables => { hash_key => "sql::scan_apc_pdu_uuid::${scan_apc_pdu_uuid}" }});
delete $anvil->data->{sql}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid};
}
else
@@ -1596,10 +1597,19 @@ INSERT INTO
scan_apc_pdu_ipv4_address => $scan_apc_pdu_ipv4_address,
}});
+
if ($scan_apc_pdu_model_number ne "DELETED")
{
- # Yup! send an alert.
- my $query = "
+ # The PDUs only allow one connection at a time, so if another scan agent is
+ # connected, we'll get this issue. As such, check how long it's been missing, and
+ # alert only if it's been missing for 10 minutes.
+ my $age = $anvil->Alert->check_condition_age({name => "scan_apc_pdu::lost_pdu::".$scan_apc_pdu_serial_number});
+ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { age => $age }});
+
+ if ($age > 600)
+ {
+ # Yup! send an alert.
+ my $query = "
UPDATE
scan_apc_pdus
SET
@@ -1608,22 +1618,23 @@ SET
WHERE
scan_apc_pdu_uuid = ".$anvil->Database->quote($scan_apc_pdu_uuid)."
;";
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
- push @{$anvil->data->{sys}{queries}}, $query;
-
- my $variables = {
- model => $scan_apc_pdu_model_number,
- serial_numer => $scan_apc_pdu_serial_number,
- ip_address => $scan_apc_pdu_ipv4_address,
- };
- $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0040", variables => $variables});
- $anvil->Alert->register({
- alert_level => "warning",
- message => "scan_apc_pdu_message_0040",
- variables => $variables,
- set_by => $THIS_FILE,
- sort_position => $anvil->data->{'scan-apc-pdu'}{alert_sort}++,
- });
+ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
+ push @{$anvil->data->{sys}{queries}}, $query;
+
+ my $variables = {
+ model => $scan_apc_pdu_model_number,
+ serial_numer => $scan_apc_pdu_serial_number,
+ ip_address => $scan_apc_pdu_ipv4_address,
+ };
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0040", variables => $variables});
+ $anvil->Alert->register({
+ alert_level => "warning",
+ message => "scan_apc_pdu_message_0040",
+ variables => $variables,
+ set_by => $THIS_FILE,
+ sort_position => $anvil->data->{'scan-apc-pdu'}{alert_sort}++,
+ });
+ }
}
}
@@ -2003,6 +2014,9 @@ sub gather_pdu_data
# If I got the serial number, I found the PDU.
next if not $scan_apc_pdu_serial_number;
+
+ # In case this PDU disappeared before, this will clear that condition.
+ $anvil->Alert->check_condition_age({clear => 1, name => "scan_apc_pdu::lost_pdu::".$scan_apc_pdu_serial_number});
#############################################################################################
# Base PDU info #
diff --git a/share/words.xml b/share/words.xml
index e934f2e0..e36b0d84 100644
--- a/share/words.xml
+++ b/share/words.xml
@@ -466,7 +466,7 @@ Giving up.
Failed to find the server: [#!variable!server!#] by name or UUID? Exiting.
The protocol: [#!variable!protocol!#] is invalid. Please use '--help' for more information.
The DR host: [#!variable!host_name!#] doesn't appear to be storage group: [#!variable!storage_group!#]. Unable to proceed.
- We need: [#!variable!space_needed!# (#!variables!space_needed_bytes!# Bytes)] from the storage group: [#!variable!storage_group!#], but only: [#!variables!space_on_dr!# (#!variable!space_on_dr_bytes!# bytes)] is available on DR. Unable to proceed.
+ We need: [#!variable!space_needed!# (#!variable!space_needed_bytes!# Bytes)] from the storage group: [#!variable!storage_group!#], but only: [#!variable!space_on_dr!# (#!variable!space_on_dr_bytes!# bytes)] is available on DR. Unable to proceed.
[ Error ] - The check appears to have failed. Expected a return code of '0', but got: [#!variable!return_code!#]
The output, if any, was
====
@@ -520,7 +520,7 @@ The definition data passed in was:
====
]]>
[ Error ] - Failed to wipe and delete the logical volume: [#!variable!local_lv!#] that was volume number: [#!variable!volume!#] under the server: [#!variable!server!#].
- There was a problem deleting: [#!variables!config_file!#]. The rest of the process completed successfully. Please manually remove this file if it still exists.
+ There was a problem deleting: [#!variable!config_file!#]. The rest of the process completed successfully. Please manually remove this file if it still exists.
@@ -576,7 +576,13 @@ sys::privacy::strong = #!data!sys::privacy::strong!#
# Normally, if one node in the Anvil! is healthier than the other, it will pull the servers from the peer
# on to it. This is a process called "preventative live migration". If you would like to disable this
# feature, set this to '1'.
-feature::scancore::disable::preventative-live-migration = 0
+#feature::scancore::disable::preventative-live-migration = 0
+
+# If "preventative live migration" is enabled, this sets the threshold to trigger migration. The difference
+# in health score has to be equal to or greater than the number below. The health scores are usually set to
+# '1' per event, though scan agents are free to assign higher scores per event. The default threshold is
+# '2'. To migrate on any health difference, set this to '1'. Use whole numbers only.
+#feature::scancore::threshold::preventative-live-migration = 2
### Database
# Database connections;
@@ -2191,8 +2197,9 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
The server: [#!variable!server!#] is ready to boot.
The server: [#!variable!server!#] was found to be running already, but it wasn't marked as booted. Marking it as if it just booted to handle any dependent servers.
The server: [#!variable!server!#] is configured to stay off, ignoring it.
- The file: [#!variable!file!#] needs to be added to the database, but since the last scan it's size grew from: [#!variable!old_size_bytes!# (#!variables!old_size_hr!#)] to: [#!variable!new_size_bytes!# (#!variables!new_size_hr!#)]. A difference of: [#!variable!difference_bytes!# (#!variables!difference_hr!#)]. It might still be being uploaded, so we'll keep checking periodocally until the size stops changing.
+ The file: [#!variable!file!#] needs to be added to the database, but since the last scan it's size grew from: [#!variable!old_size_bytes!# (#!variable!old_size_hr!#)] to: [#!variable!new_size_bytes!# (#!variable!new_size_hr!#)]. A difference of: [#!variable!difference_bytes!# (#!variable!difference_hr!#)]. It might still be being uploaded, so we'll keep checking periodocally until the size stops changing.
Found the missing file: [#!variable!file!#] in the directory: [#!variable!directory!#]. Updating the database now.
+ Deleting the hash key: [#!variable!hash_key!#].
The host name: [#!variable!target!#] does not resolve to an IP address.
diff --git a/tools/anvil-manage-files b/tools/anvil-manage-files
index d07cb7be..76f2b6ee 100755
--- a/tools/anvil-manage-files
+++ b/tools/anvil-manage-files
@@ -28,7 +28,8 @@
# 6 = The file to delete is not under '/mnt/shared/'.
#
# TODO:
-# -
+# - If two Strikers have the same file name, but different sizes, we get into a yo-yo of updating the two
+# sides. If this happens, we need to rsync the larger one over the smaller one.
#
# NOTE:
# - remove unsyncs, add syncs.