* Biggest change in this commit; scan-apc-pdu and scan-apc-ups now only run on Striker dashboards! This was because we found that if two machines ran their agents at the same time, the reponce time from SNMP read requests grew a lot. This meant it was likely a third, fourth and so on machne would also then have their scan agent runs while the existing runs were still trying to process, causing the SNMP reads to get slower still until timeouts popped.

* Bumped scancore's scan delay from 30 seconds to 60.
* Shorted the age-out time to 24 hours and again boosted the archive thresholds. As we get a feel for the amount of data collected on multi-Anvil! systems over time, we may continue to tune this.l
* Moved Database->archive_database() to be called daily by anvil-daemon, instead of during '->connect' calls.
* Added locking to Database->_age_out_data to avoid resyncs mid-purge. Also moved the power, temperature and ip_address columns into the same 'to_clean' hash as it was duplicate logic.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 8807915bb7
commit 4dcd505753
  1. 2
      Anvil/Tools.pm
  2. 108
      Anvil/Tools/Database.pm
  3. 6
      Anvil/Tools/ScanCore.pm
  4. 6
      anvil.conf
  5. 8
      scancore-agents/scan-apc-pdu/scan-apc-pdu
  6. 14
      scancore-agents/scan-apc-ups/scan-apc-ups
  7. 2
      scancore-agents/scan-storcli/scan-storcli
  8. 2
      share/words.xml
  9. 7
      tools/anvil-daemon
  10. 2
      tools/scancore

@ -842,7 +842,7 @@ sub _set_defaults
database => { database => {
# This is the number of hours, after which, transient data (like temperature and # This is the number of hours, after which, transient data (like temperature and
# power data) is considered "old" and gets deleted from the database. # power data) is considered "old" and gets deleted from the database.
age_out => 48, age_out => 24,
}, },
}; };
$anvil->data->{sys} = { $anvil->data->{sys} = {

@ -186,7 +186,7 @@ sub archive_database
# If not given tables, use the system tables. # If not given tables, use the system tables.
if (not $tables) if (not $tables)
{ {
$tables = $anvil->data->{sys}{database}{check_tables}; $tables = $anvil->Database->get_tables_from_schema({debug => $debug, schema_file => "all"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { tables => $tables }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { tables => $tables }});
} }
@ -216,11 +216,13 @@ sub archive_database
return(1); return(1);
} }
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0451"});
# Make sure I have sane values. # Make sure I have sane values.
$anvil->data->{sys}{database}{archive}{compress} = 1 if not defined $anvil->data->{sys}{database}{archive}{compress}; $anvil->data->{sys}{database}{archive}{compress} = 1 if not defined $anvil->data->{sys}{database}{archive}{compress};
$anvil->data->{sys}{database}{archive}{count} = 25000 if not defined $anvil->data->{sys}{database}{archive}{count}; $anvil->data->{sys}{database}{archive}{count} = 100000 if not defined $anvil->data->{sys}{database}{archive}{count};
$anvil->data->{sys}{database}{archive}{division} = 30000 if not defined $anvil->data->{sys}{database}{archive}{division}; $anvil->data->{sys}{database}{archive}{division} = 125000 if not defined $anvil->data->{sys}{database}{archive}{division};
$anvil->data->{sys}{database}{archive}{trigger} = 50000 if not defined $anvil->data->{sys}{database}{archive}{trigger}; $anvil->data->{sys}{database}{archive}{trigger} = 500000 if not defined $anvil->data->{sys}{database}{archive}{trigger};
$anvil->data->{sys}{database}{archive}{save_to_disk} = 0; $anvil->data->{sys}{database}{archive}{save_to_disk} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"sys::database::archive::compress" => $anvil->data->{sys}{database}{archive}{compress}, "sys::database::archive::compress" => $anvil->data->{sys}{database}{archive}{compress},
@ -14743,10 +14745,6 @@ sub resync_databases
return(0); return(0);
} }
# Archive old data before resync'ing
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0451"});
$anvil->Database->archive_database({debug => $debug});
### NOTE: Don't sort this array, we need to resync in the order that the user passed the tables to us ### NOTE: Don't sort this array, we need to resync in the order that the user passed the tables to us
### to avoid trouble with primary/foreign keys. ### to avoid trouble with primary/foreign keys.
# We're going to use the array of tables assembles by _find_behind_databases() stored in # We're going to use the array of tables assembles by _find_behind_databases() stored in
@ -15535,6 +15533,9 @@ sub _age_out_data
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->_age_out_data()" }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->_age_out_data()" }});
# Get a lock.
$anvil->Database->locking({debug => $debug, request => 1});
# Log our start, as this takes some time to run. # Log our start, as this takes some time to run.
my $start_time = time; my $start_time = time;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0623"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0623"});
@ -15583,6 +15584,8 @@ sub _age_out_data
# Commit the DELETEs. # Commit the DELETEs.
$anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__}); $anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__});
} }
$anvil->Database->locking({debug => $debug, renew => 1});
} }
# Remove old processed alerts. # Remove old processed alerts.
@ -15622,6 +15625,7 @@ sub _age_out_data
# Commit the DELETEs. # Commit the DELETEs.
$anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__}); $anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__});
} }
$anvil->Database->locking({debug => $debug, renew => 1});
} }
# Now process power and tempoerature, if not disabled. # Now process power and tempoerature, if not disabled.
@ -15631,13 +15635,14 @@ sub _age_out_data
if ($age =~ /\D/) if ($age =~ /\D/)
{ {
# Age is not valid, set it to defaults. # Age is not valid, set it to defaults.
$age = 48; $age = 24;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { age => $age }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { age => $age }});
} }
if ($age == 0) if ($age == 0)
{ {
# Disabled, return. # Disabled, return.
$anvil->Database->locking({debug => $debug, release => 1});
return(0); return(0);
} }
@ -15649,83 +15654,15 @@ sub _age_out_data
old_timestamp => $old_timestamp, old_timestamp => $old_timestamp,
}}); }});
# Purge temperature and power data.
my $tables = {};
$tables->{temperature} = "temperature_uuid";
$tables->{power} = "power_uuid";
$tables->{ip_addresses} = "ip_address_uuid";
foreach my $table (sort {$a cmp $b} keys %{$tables})
{
my $uuid_column = $tables->{$table};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
table => $table,
uuid_column => $uuid_column,
}});
foreach my $uuid (keys %{$anvil->data->{cache}{database_handle}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { uuid => $uuid }});
my $queries = [];
my $query = "SELECT ".$uuid_column." FROM ".$table;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $column_uuid = $row->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { column_uuid => $column_uuid }});
# Find how many records will be left. If it's 0, we'll use an OFFSET 1.
my $query = "SELECT history_id FROM history.".$table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND modified_date > '".$old_timestamp."';";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
results => $results,
count => $count,
}});
if ($count)
{
# At least one record will be left.
my $query = "DELETE FROM history.".$table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND modified_date <= '".$old_timestamp."';";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
push @{$queries}, $query;
}
else
{
# This would delete everything, reserve at least one record.
foreach my $row (@{$results})
{
my $history_id = $row->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { history_id => $history_id }});
my $query = "DELETE FROM history.".$table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND hostory_id = '".$history_id."';";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
push @{$queries}, $query;
}
}
}
my $commits = @{$queries};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { commits => $commits }});
if ($commits)
{
# Commit the DELETEs.
$anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__});
}
}
}
### Looks for scan agent data that grows quickly. ### Looks for scan agent data that grows quickly.
# We don't use 'anvil->data' to prevent injecting SQL queries in anvil.conf # We don't use 'anvil->data' to prevent injecting SQL queries in anvil.conf
my $to_clean = {}; my $to_clean = {};
# Power, temperatures and ip addresses
$to_clean->{table}{temperature}{child_table}{temperature}{uuid_column} = "temperature_uuid";
$to_clean->{table}{power}{child_table}{power}{uuid_column} = "power_uuid";
$to_clean->{table}{ip_addresses}{child_table}{ip_addresses}{uuid_column} = "ip_address_uuid";
# scan_apc_pdu # scan_apc_pdu
$to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_phases}{uuid_column} = "scan_apc_pdu_phase_uuid"; $to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_phases}{uuid_column} = "scan_apc_pdu_phase_uuid";
$to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_variables}{uuid_column} = "scan_apc_pdu_variable_uuid"; $to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_variables}{uuid_column} = "scan_apc_pdu_variable_uuid";
@ -15855,6 +15792,7 @@ sub _age_out_data
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { vacuum => $vacuum }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { vacuum => $vacuum }});
undef $queries; undef $queries;
} }
$anvil->Database->locking({debug => $debug, renew => 1});
} }
} }
} }
@ -15868,11 +15806,15 @@ sub _age_out_data
my $query = "VACUUM FULL;"; my $query = "VACUUM FULL;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
$anvil->Database->write({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); $anvil->Database->write({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__});
$anvil->Database->locking({debug => $debug, renew => 1});
} }
my $runtime = time - $start_time; my $runtime = time - $start_time;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0624", variables => { runtime => $runtime }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0624", variables => { runtime => $runtime }});
$anvil->Database->locking({debug => $debug, release => 1});
return(0); return(0);
} }

@ -289,7 +289,7 @@ sub call_scan_agents
my $runtime = (time - $start_time); my $runtime = (time - $start_time);
my $log_level = $debug; my $log_level = $debug;
my $string_key = "log_0557"; my $string_key = "log_0557";
if ($runtime > 10) if ($runtime > 15)
{ {
$log_level = 1; $log_level = 1;
$string_key = "log_0621"; $string_key = "log_0621";
@ -2154,7 +2154,7 @@ LIMIT 1;";
} }
# Check this target's power state. # Check this target's power state.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0561", variables => { host_name => $host_name }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0561", variables => { host_name => $host_name }});
# Do we share a network with this system? # Do we share a network with this system?
$anvil->Network->load_ips({ $anvil->Network->load_ips({
@ -2206,7 +2206,7 @@ LIMIT 1;";
if ($access) if ($access)
{ {
# It's up. # It's up.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0562", variables => { host_name => $host_name }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0562", variables => { host_name => $host_name }});
$check_power = 0; $check_power = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {

@ -76,9 +76,9 @@ feature::scancore::disable::preventative-live-migration = 0
# NOTE: If the archive directory doesn't exist, Anvil! will create it # NOTE: If the archive directory doesn't exist, Anvil! will create it
# automatically the first time it is needed. # automatically the first time it is needed.
sys::database::archive::compress = 1 sys::database::archive::compress = 1
sys::database::archive::trigger = 100000 sys::database::archive::trigger = 500000
sys::database::archive::count = 50000 sys::database::archive::count = 100000
sys::database::archive::division = 75000 sys::database::archive::division = 125000
sys::database::archive::directory = /usr/local/anvil/archives/ sys::database::archive::directory = /usr/local/anvil/archives/
# This puts a limit on how many queries (writes, generally) to make in a single batch transaction. This is # This puts a limit on how many queries (writes, generally) to make in a single batch transaction. This is

@ -162,6 +162,14 @@ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "
# Read switches # Read switches
$anvil->Get->switches; $anvil->Get->switches;
# Too many connections cause the UPS to lag out, so we only run on Strikers.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "striker") && (not $anvil->data->{switches}{force}))
{
$anvil->nice_exit({exit_code => 1});
}
# If we're disabled and '--force' wasn't used, exit. # If we're disabled and '--force' wasn't used, exit.
if (($anvil->data->{scancore}{'scan-apc-pdu'}{disable}) && (not $anvil->data->{switches}{force})) if (($anvil->data->{scancore}{'scan-apc-pdu'}{disable}) && (not $anvil->data->{switches}{force}))
{ {

@ -189,6 +189,14 @@ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "
# Read switches # Read switches
$anvil->Get->switches; $anvil->Get->switches;
# Too many connections cause the UPS to lag out, so we only run on Strikers.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "striker") && (not $anvil->data->{switches}{force}))
{
$anvil->nice_exit({exit_code => 1});
}
# If we're disabled and '--force' wasn't used, exit. # If we're disabled and '--force' wasn't used, exit.
if (($anvil->data->{scancore}{'scan-apc-ups'}{disable}) && (not $anvil->data->{switches}{force})) if (($anvil->data->{scancore}{'scan-apc-ups'}{disable}) && (not $anvil->data->{switches}{force}))
{ {
@ -232,7 +240,11 @@ gather_ups_data($anvil);
find_changes($anvil); find_changes($anvil);
# Update the database # Update the database
$anvil->Database->insert_or_update_updated({updated_by => $THIS_FILE}); my $updated_uuid = $anvil->Database->insert_or_update_updated({
debug => 2,
updated_by => $THIS_FILE,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { updated_uuid => $updated_uuid }});
# Clean up and go away. # Clean up and go away.
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});

@ -6093,7 +6093,7 @@ AND
$message_key = "scan_storcli_warning_0006"; $message_key = "scan_storcli_warning_0006";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { message_key => $message_key }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { message_key => $message_key }});
} }
elsif ($old_variable_value > $new_variable_value) elsif ($new_variable_value > $old_variable_value)
{ {
# Rising # Rising
my $jumped = ($new_variable_value - $old_variable_value); my $jumped = ($new_variable_value - $old_variable_value);

@ -1596,7 +1596,7 @@ Failed to promote the DRBD resource: [#!variable!resource!#] primary. Expected a
<key name="log_0448">Ready to parse: [#!variable!file!#].</key> <key name="log_0448">Ready to parse: [#!variable!file!#].</key>
<key name="log_0449">Parsed: [#!variable!records!#], adding/updating them to the database now.</key> <key name="log_0449">Parsed: [#!variable!records!#], adding/updating them to the database now.</key>
<key name="log_0450">Skipping the network scan. The next scheduled scan will be done in: [#!variable!next_scan!#]. Override with '--force'.</key> <key name="log_0450">Skipping the network scan. The next scheduled scan will be done in: [#!variable!next_scan!#]. Override with '--force'.</key>
<key name="log_0451">Checking to see if any data needs to be archived before starting the resync.</key> <key name="log_0451">Checking to see if any data needs to be archived.</key>
<key name="log_0452">Skipping archiving, not a Striker dashboard.</key> <key name="log_0452">Skipping archiving, not a Striker dashboard.</key>
<key name="log_0453">Archiving: [#!variable!records!#] over: [#!variable!loops!#] segments from the table: [#!variable!table!#] from the database on: [#!variable!host!#]. This might take a bit, please be patient.</key> <key name="log_0453">Archiving: [#!variable!records!#] over: [#!variable!loops!#] segments from the table: [#!variable!table!#] from the database on: [#!variable!host!#]. This might take a bit, please be patient.</key>
<key name="log_0454">Writing: [#!variable!records!#] to the file: [#!variable!file!#].</key> <key name="log_0454">Writing: [#!variable!records!#] to the file: [#!variable!file!#].</key>

@ -172,7 +172,7 @@ $anvil->data->{timing}{daily_checks} = 86400;
$anvil->data->{timing}{repo_update_interval} = 86400; $anvil->data->{timing}{repo_update_interval} = 86400;
$anvil->data->{timing}{next_minute_check} = $now_time - 1; $anvil->data->{timing}{next_minute_check} = $now_time - 1;
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; $anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
"s2:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, "s2:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
"s3:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, "s3:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval},
@ -337,7 +337,7 @@ sub set_delay
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }});
if ($type eq "striker") if ($type eq "striker")
{ {
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) foreach my $uuid (keys %{$anvil->data->{database}})
{ {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"sys::host_uuid" => $anvil->data->{sys}{host_uuid}, "sys::host_uuid" => $anvil->data->{sys}{host_uuid},
@ -465,6 +465,9 @@ sub handle_periodic_tasks
# Age out old data. This takes up to a minute. # Age out old data. This takes up to a minute.
$anvil->Database->_age_out_data(); $anvil->Database->_age_out_data();
# Archive old data
$anvil->Database->archive_database();
# Record a job, don't call it directly. It takes too long to run. # Record a job, don't call it directly. It takes too long to run.
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE, file => $THIS_FILE,

@ -106,7 +106,7 @@ while(1)
prepare_for_run($anvil); prepare_for_run($anvil);
# Set our sleep time # Set our sleep time
my $run_interval = 30; my $run_interval = 60;
if ((exists $anvil->data->{scancore}{timing}{run_interval}) && ($anvil->data->{scancore}{timing}{run_interval} =~ /^\d+$/)) if ((exists $anvil->data->{scancore}{timing}{run_interval}) && ($anvil->data->{scancore}{timing}{run_interval} =~ /^\d+$/))
{ {
$run_interval = $anvil->data->{scancore}{timing}{run_interval}; $run_interval = $anvil->data->{scancore}{timing}{run_interval};

Loading…
Cancel
Save