diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 8e39faae..b43980d5 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -15522,7 +15522,9 @@ sub write =head2 _age_out_data -This deletes any data considered transient (power, thermal, etc) after C<< scancore::database::age_out >> hours old. +This deletes any data considered transient (power, thermal, etc) after C<< scancore::database::age_out >> hours old. The exception are completed jobs that are more than 2 hours old, which are purged. + +B<< Note >>: Scan agents can have fast-growing tabled purged as well. This is done by setting the appropriate values in the C<< $to_clean >> hash contained within. This is hard coded so the source needs to be updated as the number of agents grow. =cut sub _age_out_data @@ -15533,6 +15535,10 @@ sub _age_out_data my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->_age_out_data()" }}); + # Log our start, as this takes some time to run. + my $start_time = time; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0623"}); + # Get the timestamp to delete jobs and processed alert records older than 2h my $query = "SELECT now() - '2h'::interval"; my $old_timestamp = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; @@ -15717,72 +15723,140 @@ sub _age_out_data } ### Looks for scan agent data that grows quickly. - # scan-ipmitool - $query = "SELECT COUNT(*) FROM pg_catalog.pg_tables WHERE tablename='scan_ipmitool_values' AND schemaname='public';"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + # We don't use 'anvil->data' to prevent injecting SQL queries in anvil.conf + my $to_clean = {}; - my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { count => $count }}); - if ($count) + # scan_apc_pdu + $to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_phases}{uuid_column} = "scan_apc_pdu_phase_uuid"; + $to_clean->{table}{scan_apc_pdus}{child_table}{scan_apc_pdu_variables}{uuid_column} = "scan_apc_pdu_variable_uuid"; + + # scan_apc_ups + $to_clean->{table}{scan_apc_upses}{child_table}{scan_apc_ups_batteries}{uuid_column} = "scan_apc_ups_battery_uuid"; + $to_clean->{table}{scan_apc_upses}{child_table}{scan_apc_ups_input}{uuid_column} = "scan_apc_ups_input_uuid"; + $to_clean->{table}{scan_apc_upses}{child_table}{scan_apc_ups_output}{uuid_column} = "scan_apc_ups_output_uuid"; + + # scan_filesystems + $to_clean->{table}{scan_filesystems}{child_table}{scan_filesystems}{uuid_column} = "scan_filesystem_uuid"; + + # scan_hardware + $to_clean->{table}{scan_hardware}{child_table}{scan_hardware}{uuid_column} = "scan_hardware_uuid"; + $to_clean->{table}{scan_hardware}{child_table}{scan_hardware}{uuid_column} = "scan_hardware_uuid"; + + # scan_hpacucli + $to_clean->{table}{scan_hpacucli_variables}{child_table}{scan_hpacucli_variables}{uuid_column} = "scan_hpacucli_variable_uuid"; + + # scan_ipmitool + $to_clean->{table}{scan_ipmitool}{child_table}{scan_ipmitool_values}{uuid_column} = "scan_ipmitool_value_uuid"; + + # scan_storcli + $to_clean->{table}{scan_storcli_variables}{child_table}{scan_storcli_variables}{uuid_column} = "scan_storcli_variable_uuid"; + + my $vacuum = 0; + foreach my $table (sort {$a cmp $b} keys %{$to_clean->{table}}) { - foreach my $uuid (keys %{$anvil->data->{cache}{database_handle}}) - { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uuid => $uuid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { table => $table }}); - my $queries = []; - my $query = "SELECT scan_ipmitool_value_uuid FROM scan_ipmitool_values;"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); - - my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - results => $results, - count => $count, - }}); - foreach my $row (@{$results}) + # Does the table exist? + $query = "SELECT COUNT(*) FROM pg_catalog.pg_tables WHERE tablename='scan_apc_pdus' AND schemaname='public';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { count => $count }}); + if ($count) + { + # The table exists, clean up child tables. + foreach my $uuid (keys %{$anvil->data->{cache}{database_handle}}) { - my $column_uuid = $row->[0]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { column_uuid => $column_uuid }}); - - # Find how many records will be left. If it's 0, we'll use an OFFSET 1. - my $query = "SELECT history_id FROM history.scan_ipmitool_values WHERE scan_ipmitool_value_uuid = ".$anvil->Database->quote($column_uuid)." AND modified_date > '".$old_timestamp."';"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { uuid => $uuid }}); - my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - results => $results, - count => $count, - }}); - if ($count) + foreach my $child_table (sort {$a cmp $b} keys %{$to_clean->{table}{$table}{child_table}}) { - # At least one record will be left. - my $query = "DELETE FROM history.scan_ipmitool_values WHERE scan_ipmitool_value_uuid = ".$anvil->Database->quote($column_uuid)." AND modified_date <= '".$old_timestamp."';"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); - push @{$queries}, $query; - } - else - { - # This would delete everything, reserve at least one record. + my $uuid_column = $to_clean->{table}{$table}{child_table}{$child_table}{uuid_column}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + child_table => $child_table, + uuid_column => $uuid_column, + }}); + + # Get a list of all records. + my $queries = []; + my $query = "SELECT ".$uuid_column." FROM ".$child_table.";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); foreach my $row (@{$results}) { - my $history_id = $row->[0]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { history_id => $history_id }}); + my $column_uuid = $row->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { column_uuid => $column_uuid }}); - my $query = "DELETE FROM history.scan_ipmitool_values WHERE scan_ipmitool_value_uuid = ".$anvil->Database->quote($column_uuid)." AND hostory_id = '".$history_id."';"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); - push @{$queries}, $query; + # Find out of there are any records to remove at all. + my $query = "SELECT history_id FROM history.".$child_table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND modified_date <= '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + + if ($count) + { + # Find how many records will be left. If it's 0, we'll use an OFFSET 1. + my $query = "SELECT history_id FROM history.".$child_table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND modified_date > '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + + my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + results => $results, + count => $count, + }}); + if ($count) + { + # At least one record will be left, we can do a simple delete. + my $query = "DELETE FROM history.".$child_table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND modified_date <= '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + push @{$queries}, $query; + } + else + { + # This would delete everything, reserve at least one record. + foreach my $row (@{$results}) + { + my $history_id = $row->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { history_id => $history_id }}); + + my $query = "DELETE FROM history.".$child_table." WHERE ".$uuid_column." = ".$anvil->Database->quote($column_uuid)." AND history_id = '".$history_id."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + push @{$queries}, $query; + } + } + } + } + + my $commits = @{$queries}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { commits => $commits }}); + if ($commits) + { + # Commit the DELETEs. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0622", variables => { + age => $age, + table => $child_table, + database => $anvil->Get->host_name_from_uuid({host_uuid => $uuid, debug => $debug}), + }}); + $anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__}); + + $vacuum += $commits; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { vacuum => $vacuum }}); + undef $queries; } } } - - my $commits = @{$queries}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { commits => $commits }}); - if ($commits) - { - # Commit the DELETEs. - $anvil->Database->write({debug => $debug, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__}); - } } } @@ -15796,6 +15870,9 @@ sub _age_out_data $anvil->Database->write({debug => $debug, uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); } + my $runtime = time - $start_time; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0624", variables => { runtime => $runtime }}); + return(0); } @@ -16133,7 +16210,7 @@ sub _find_column $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { table => $table, count => $count, }}); diff --git a/Anvil/Tools/Job.pm b/Anvil/Tools/Job.pm index eacc7728..a237daac 100644 --- a/Anvil/Tools/Job.pm +++ b/Anvil/Tools/Job.pm @@ -616,7 +616,7 @@ sub update_progress $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { message => $message, picked_up_by => $picked_up_by }}); if ($message eq "clear") { - $picked_up_by = 0; + $picked_up_by = $$; $clear_status = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { picked_up_by => $picked_up_by, diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index b2a9f514..321ffda5 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -4933,7 +4933,7 @@ sub update_hosts # Matches, we don't need to deal with this name. delete $anvil->data->{hosts}{needed}{$name}; } - else + elsif ($ip_address ne "127.0.0.1") { # The IP has changed. Skip this name (which removes it from the list). $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0481", variables => { diff --git a/anvil.conf b/anvil.conf index ca94138b..543cc918 100644 --- a/anvil.conf +++ b/anvil.conf @@ -76,9 +76,9 @@ feature::scancore::disable::preventative-live-migration = 0 # NOTE: If the archive directory doesn't exist, Anvil! will create it # automatically the first time it is needed. sys::database::archive::compress = 1 -sys::database::archive::trigger = 50000 -sys::database::archive::count = 25000 -sys::database::archive::division = 30000 +sys::database::archive::trigger = 100000 +sys::database::archive::count = 50000 +sys::database::archive::division = 75000 sys::database::archive::directory = /usr/local/anvil/archives/ # This puts a limit on how many queries (writes, generally) to make in a single batch transaction. This is diff --git a/share/words.xml b/share/words.xml index b8c1ec19..a25eae8b 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1787,6 +1787,9 @@ The file: [#!variable!file!#] needs to be updated. The difference is: The host: [#!variable!host_name!#] has shut down for thermal reasons: [#!variable!count!#] times. To prevent a frequent boot / thermal excursion / shutdown loop, we will wait: [#!variable!wait_for!#] before marking it's temperature as being OK again. This host has been running for: [#!variable!uptime!#]. The cluster will not be started (uptime must be less than 10 minutes for 'anvil-safe-start' to be called automatically). - The Scan agent: [#!variable!agent_name!#] ran a bit long, exiting after: [#!variable!runtime!#] seconds with the return code: [#!variable!return_code!#]. + Aging out one or more records that are more than: [#!variable!age!#] hours old from the table: [#!variable!table!#] on the database host: [#!variable!database!#]. + Starting the process of aging out old data. This can take about a minute, please be patient. + Aging out old data completed after: [#!variable!runtime!#] seconds. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index d4cb9059..13ec902d 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -462,6 +462,9 @@ sub handle_periodic_tasks ### recently enough. if ($type eq "striker") { + # Age out old data. This takes up to a minute. + $anvil->Database->_age_out_data(); + # Record a job, don't call it directly. It takes too long to run. my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ file => $THIS_FILE, diff --git a/tools/striker-get-peer-data b/tools/striker-get-peer-data index 0708ce7d..52ed6b97 100755 --- a/tools/striker-get-peer-data +++ b/tools/striker-get-peer-data @@ -315,7 +315,7 @@ sub get_password } # We'll pick up the peer's password from the database. - $anvil->Database->connect(); + $anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { diff --git a/tools/striker-initialize-host b/tools/striker-initialize-host index e2eda4a7..6c66a4f3 100755 --- a/tools/striker-initialize-host +++ b/tools/striker-initialize-host @@ -43,7 +43,7 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list }}); # Connect to the database(s). -$anvil->Database->connect; +$anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { diff --git a/tools/striker-manage-install-target b/tools/striker-manage-install-target index 9c17477c..5cb454f8 100755 --- a/tools/striker-manage-install-target +++ b/tools/striker-manage-install-target @@ -123,7 +123,7 @@ if ($anvil->data->{switches}{status}) } # Connect to the database(s). -$anvil->Database->connect; +$anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { diff --git a/tools/striker-manage-peers b/tools/striker-manage-peers index e36ee3bd..24df5a01 100755 --- a/tools/striker-manage-peers +++ b/tools/striker-manage-peers @@ -63,7 +63,7 @@ if (($< != 0) && ($> != 0)) } # We'll try to connect in case we're adding additional peers. -$anvil->Database->connect(); +$anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); # Am I adding, editing or deleting? @@ -543,6 +543,7 @@ sub process_entry # Connect, and configure, if needed. $anvil->Database->connect({ debug => 3, + check_for_resync => 1, check_if_configured => $host_uuid eq $anvil->Get->host_uuid ? 1 : 0, }); @@ -623,7 +624,7 @@ sub process_entry sleep 1; - $anvil->Database->connect({db_uuid => $host_uuid}); + $anvil->Database->connect({check_for_resync => 1, db_uuid => $host_uuid}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); } } diff --git a/tools/striker-purge-target b/tools/striker-purge-target index 709099fd..e964d419 100755 --- a/tools/striker-purge-target +++ b/tools/striker-purge-target @@ -20,7 +20,7 @@ if (($running_directory =~ /^\./) && ($ENV{PWD})) my $anvil = Anvil::Tools->new(); -$anvil->Database->connect({debug => 3}); +$anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { diff --git a/tools/striker-scan-network b/tools/striker-scan-network index 41739957..9daff95b 100755 --- a/tools/striker-scan-network +++ b/tools/striker-scan-network @@ -40,7 +40,7 @@ if (($< != 0) && ($> != 0)) $anvil->nice_exit({exit_code => 5}); } -$anvil->Database->connect; +$anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) {