diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index f627a5fc..a258e609 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -15691,6 +15691,16 @@ sub resync_databases return(0); } + # If we're not a striker, don't resync ever. + my $host_type = $anvil->Get->host_type(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_type => $host_type }}); + if ($host_type ne "striker") + { + # Not a dashboard, don't resync + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0686"}); + return(1); + } + # If we're hosting servers, don't resync. Too high of a risk of oom-killer being triggered. my $server_count = $anvil->Server->count_servers({debug => $debug}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_count => $server_count }}); @@ -15700,6 +15710,9 @@ sub resync_databases return(0); } + # Before resync, age out the data in each DB + $anvil->Database->_age_out_data({debug => $debug}); + ### NOTE: Don't sort this array, we need to resync in the order that the user passed the tables to us ### to avoid trouble with primary/foreign keys. # We're going to use the array of tables assembles by _find_behind_databases() stored in diff --git a/Anvil/Tools/ScanCore.pm b/Anvil/Tools/ScanCore.pm index 7e2d6600..1fe867b4 100644 --- a/Anvil/Tools/ScanCore.pm +++ b/Anvil/Tools/ScanCore.pm @@ -199,7 +199,7 @@ sub agent_startup if (($anvil->data->{scancore}{$agent}{disable}) && (not $anvil->data->{switches}{force})) { # Exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0646", variables => { program => $THIS_FILE }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0646", variables => { program => $agent }}); $anvil->nice_exit({exit_code => 0}); } diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index e49e9db4..f4198021 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -654,6 +654,159 @@ sub check_memory } +=head2 check_ram_use + +This is meant to be used by daemons to check how much RAM it is using. It returns an anonymous array with the first value being C<< 0 >> if the in-use RAM is below the maximum, and C<< 1 >> it the in-use RAM is too high. The second value is the amount of RAM in use, in bytes. If the program is not found to be running, C<< 2, 0 >> is returned. + + my ($problem, $used_ram) = $anvil->System->check_ram_use({ + program => $THIS_FILE, + max_ram => 1073741824, + }); + +Parameters; + +=head3 program (required) + +This is generally C<< $THIS_FILE >>. Though this could be used to check the RAM use of other programs. + +=head3 max_ram (optional, default '1073741824' (1 GiB)) + +This is the limit allowed. If the in-use RAM is greater than this amount, an alert will be generated and sent. + +=cut +sub check_ram_use +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "System->check_ram_use()" }}); + + my $program = defined $parameter->{program} ? $parameter->{program} : ""; + my $max_ram = defined $parameter->{max_ram} ? $parameter->{max_ram} : 1073741824; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + program => $program, + max_ram => $max_ram, + }}); + + # Find the PID(s) of the program. + my $problem = 0; + my $ram_used = 0; + + # See if we're a daemon running under systemctl. If so, the memory reported includes all spawned + # child programs, swap, etc. Much more thorough. + my $shell_call = $anvil->data->{path}{exe}{systemctl}." status ".$program." --lines=0"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + output => $output, + return_code => $return_code, + }}); + foreach my $line (split/\n/, $output) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }}); + if ($line =~ /Memory: (.*)?/) + { + my $memory = $1; + my $in_bytes = $anvil->Convert->human_readable_to_bytes({size => $memory, base2 => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + memory => $memory, + in_bytes => $anvil->Convert->add_commas({number => $in_bytes})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $in_bytes}).")", + }}); + if ($in_bytes =~ /^\d+$/) + { + $ram_used = $in_bytes; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", + }}); + } + last; + } + } + + # If we didn't get the RAM from systemctl, read smaps + if (not $ram_used) + { + my $pids = $anvil->System->pids({debug => $debug, program_name => $program}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pids => $pids }}); + + my $pids_found = @{$pids}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { pids_found => $pids_found }}); + + if (not $pids_found) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0135", variables => { program => $program }}); + return(2, 0); + } + + # Read in the smaps for each pid + foreach my $pid (sort {$a cmp $b} @{$pids}) + { + my $smaps_path = "/proc/".$pid."/smaps"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { smaps_path => $smaps_path }}); + + # This will store the amount of RAM used by this specific PID. + $anvil->data->{memory}{pid}{$pid} = 0; + + if (not -e $smaps_path) + { + # It is possible that the program just closed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0433", variables => { pid => $pid }}); + next; + } + + # Read in the file. + my $body = $anvil->Storage->read_file({debug => $debug, file => $smaps_path}); + foreach my $line (split/\n/, $body) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }}); + if ($line =~ /^Private_Dirty:\s+(\d+) (.*B)$/) + { + my $size = $1; + my $type = $2; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + type => $type, + size => $size, + }}); + next if not $size; + next if $size =~ /\D/; + + # This uses 'kB' for 'KiB' >_> + $type = lc($type); + $type =~ s/b$/ib/ if $type !~ /ib$/; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { type => $type }}); + + my $size_in_bytes = $anvil->Convert->human_readable_to_bytes({size => $size, type => $type, base2 => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + size_in_bytes => $anvil->Convert->add_commas({number => $size_in_bytes})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $size_in_bytes}).")", + }}); + + $anvil->data->{memory}{pid}{$pid} += $size_in_bytes; + $ram_used += $size_in_bytes; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "memory::pid::${pid}" => $anvil->Convert->add_commas({number => $anvil->data->{memory}{pid}{$pid}})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{memory}{pid}{$pid}}).")", + ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", + }}); + } + } + } + } + + # Are we using too much RAM? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + max_ram => $anvil->Convert->add_commas({number => $max_ram})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $max_ram}).")", + ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", + }}); + if ($ram_used > $max_ram) + { + $problem = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); + } + + return($problem, $ram_used); +} + =head2 check_ssh_keys This method does several things; diff --git a/share/words.xml b/share/words.xml index e7c122e3..be3625b5 100644 --- a/share/words.xml +++ b/share/words.xml @@ -502,6 +502,7 @@ The output, if any, was; Failed to load the database file: [#!variable!file!#]. Deleting it so it's not considered in the next load attempt. Failed to read the kernel release on the host: [#!variable!target!#]. The return code was: [#!variable!return_code!#] (expected '0') and the release output, if any, was: [#!variable!output!#]. + The program: [#!variable!program!#] is using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). This is probably caused by a memory leak, so we will now exit so that systemctl can restart us. If this is happening repeatedly, please contact support. @@ -2077,6 +2078,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Enabling 'ping' for all users. The network interface: [#!variable!nic!#] on the host: [#!variable!host!#] is recorded in the 'history.network_interfaces' table, but has not corresponding entry in the public table. Removing it. [ Note ] - The network bridge: [#!variable!name!#] with 'bridge_uuid': [#!variable!uuid!#] is a duplicate, removing it from the database(s). + Skipping resync, not a Striker dashboard. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -3107,6 +3109,7 @@ We will sleep a bit and try again. [ Warning ] - Failed to build or install the DRBD kernel module! It is very unlikely that this machine will be able to run any servers until this is fixed. [ Warning ] - Table: [history.#!variable!table!#] not found. [ Warning ] - Holding off starting the cluster. Tested access to ourself, and failed. Is '/etc/hosts' populated? Will try again in ten seconds. + [ Warning ] - The program: [#!variable!program!#] was not found to be running. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index c514611e..c163a8a4 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -246,6 +246,9 @@ while(1) $anvil->nice_exit({exit_code => 0}); } + # Check how much RAM we're using. + check_ram($anvil); + # Disconnect from the database(s) and sleep now. $anvil->Database->disconnect(); sleep(2); @@ -258,6 +261,41 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# +# If we're using too much ram, send an alert and exit. +sub check_ram +{ + my ($anvil) = @_; + + # Problem 0 == ok, 1 == too much ram used, 2 == no pid found + my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + problem => $problem, + ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", + }}); + if ($problem) + { + # Send an alert and exit. + $anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => { + program => $THIS_FILE, + ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), + ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), + }, set_by => $THIS_FILE, sort_position => 0}); + $anvil->Email->send_alerts(); + + # Log the same + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => { + program => $THIS_FILE, + ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), + ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), + }}); + + # Exit with RC0 so that systemctl restarts + $anvil->nice_exit({exit_code => 0}); + } + + return(0); +} + # Check to see if we're mapping the network on this host. sub check_if_mapping { @@ -1291,7 +1329,7 @@ sub prep_database { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database, - "sys}{database}{connections" => $anvil->data->{sys}{database}{connections}, + "sys::database::connections" => $anvil->data->{sys}{database}{connections}, }}); if ($prep_database) { diff --git a/tools/scancore b/tools/scancore index 9b131e53..8a27054f 100755 --- a/tools/scancore +++ b/tools/scancore @@ -163,6 +163,9 @@ while(1) # Clean up cleanup_after_run($anvil); + # Check how much RAM we're using. + check_ram($anvil); + # Sleep until it's time to run again. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0249", variables => { run_interval => $run_interval, @@ -181,6 +184,41 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# +# If we're using too much ram, send an alert and exit. +sub check_ram +{ + my ($anvil) = @_; + + # Problem 0 == ok, 1 == too much ram used, 2 == no pid found + my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + problem => $problem, + ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", + }}); + if ($problem) + { + # Send an alert and exit. + $anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => { + program => $THIS_FILE, + ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), + ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), + }, set_by => $THIS_FILE, sort_position => 0}); + $anvil->Email->send_alerts(); + + # Log the same + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => { + program => $THIS_FILE, + ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), + ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), + }}); + + # Exit with RC0 so that systemctl restarts + $anvil->nice_exit({exit_code => 0}); + } + + return(0); +} + # This cleans things up after a scan run has completed. sub cleanup_after_run {