#!/usr/bin/perl # # This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster # nodes and DR hosts. # # Exit codes; # 0 = Normal exit or md5sum of this program changed and it exited to reload. # 1 = Not running as root. # 2 = Unable to connect to any database, even after trying to initialize the local system. # # TODO: # - Need to check what kind of machine this is and not prep the database unless its a dashboard. # - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs # - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good # idea or not. # - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/ # - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled. # - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU= # - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes # - # - Increase DRBD's default timeout # - Check for and enable persistent journald logging # # NOTE: # - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the # power button. Might be useful in ScanCore down the road. # use strict; use warnings; use Anvil::Tools; use Proc::Simple; #use Time::HiRes qw ( time sleep ); use JSON; use HTML::Strip; use HTML::FromText; use Data::Dumper; use Text::Diff; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } # Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. $| = 1; # Prevent a discrepency between UID/GID and EUID/EGID from throwing an error. $< = $>; $( = $); # NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods # in the loop as well to override defaults in code. my $anvil = Anvil::Tools->new(); # Make sure we're running as 'root' # $< == real UID, $> == effective UID if (($< != 0) && ($> != 0)) { # Not root print $anvil->Words->string({key => "error_0005"})."\n"; $anvil->nice_exit({exit_code => 1}); } # If, so some reason, anvil.conf is lost, create it. $anvil->System->_check_anvil_conf(); # If dnf is running, hold. $anvil->System->wait_on_dnf(); # Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks # is to setup the database server. $anvil->Database->connect({ check_if_configured => 1, check_for_resync => 2, debug => 2, }); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"}); # If I have no databases, sleep for a second and then exit (systemd will restart us). if (not $anvil->data->{sys}{database}{connections}) { # If this is a dashboard, try to configure and then connect to the local database. If this isn't a # dashboard, then just go into a loop waiting for a database to be configured. if ($anvil->Get->host_type eq "striker") { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"}); prep_database($anvil); # Try connecting again $anvil->Database->connect({check_if_configured => 1, check_for_resync => 2}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # Still nothing, sleep and exit. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"}); $anvil->nice_exit({exit_code => 2}); } } else { # Wait until we have one. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"}); until($anvil->data->{sys}{database}{connections}) { sleep 10; check_network($anvil); $anvil->refresh(); $anvil->Database->connect({check_if_configured => 1, check_for_resync => 2}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"}); } } } } # Read switches $anvil->Get->switches({list => [ "clear-mapping", "refresh-json", "run-once", "main-loop-only", "no-start", "startup-only"], man => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); if ($anvil->data->{switches}{'refresh-json'}) { $anvil->data->{switches}{'run-once'} = 1; $anvil->data->{switches}{'main-loop-only'} = 1; $anvil->data->{switches}{'no-start'} = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::run-once" => $anvil->data->{switches}{'run-once'}, "switches::main-loop-only" => $anvil->data->{switches}{'main-loop-only'}, "switches::no-start" => $anvil->data->{switches}{'no-start'}, }}); } # This is used to track initial checkes / repairs of network issues. $anvil->data->{sys}{network}{initial_checks} = 0; # We use this to delay starting jobs for a short time. our $start_time = time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }}); # There are some things we only want to run on (re)start and don't need to always run. run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'}; # Calculate my sum so that we can exit if it changes later. $anvil->Storage->record_md5sums; # What time is it, Mr. Fox? my $now_time = time; # To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest # host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay. my $delay = set_delay($anvil); # Once a minute, we'll check the md5sums and see if we should restart. # Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards). $anvil->data->{timing}{minute_checks} = 60; $anvil->data->{timing}{ten_minute_checks} = 600; $anvil->data->{timing}{daily_checks} = 86400; $anvil->data->{timing}{repo_update_interval} = 86400; $anvil->data->{timing}{next_minute_check} = $now_time - 1; $anvil->data->{timing}{next_ten_minute_check} = $now_time - 1; $anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, "s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, "s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, "s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, "s5:now_time" => $now_time, "s6:delay" => $delay, "s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, "s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, "s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, }}); # Disconnect. We'll reconnect inside the loop $anvil->Database->disconnect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"}); # This will prevent restarting while jobs are running. $anvil->data->{sys}{jobs_running} = 0; # When we periodically check if system files have changed, we'll also ask Database>connect() to check if it # needs to be configured or updated. This is done periodically as it is expensive to run on every loop. my $check_if_database_is_configured = 0; # These are the things we always want running. while(1) { # Reload defaults, re-read the config and then connect to the database(s) $anvil->refresh(); # If, for some reason, anvil.conf is lost, create it. $anvil->System->_check_anvil_conf(); $anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 2}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"}); # Mark that we don't want to check the database now. $check_if_database_is_configured = 0; if ($anvil->data->{sys}{database}{connections}) { # Run the normal tasks keep_running($anvil); # Handle periodic tasks handle_periodic_tasks($anvil); } # Exit if 'run-once' selected. if ($anvil->data->{switches}{'run-once'}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"}); $anvil->nice_exit({exit_code => 0}); } # Check how much RAM we're using. check_ram($anvil); # Disconnect from the database(s) and sleep now. $anvil->Database->disconnect(); sleep(2); } $anvil->nice_exit({exit_code => 0}); ############################################################################################################# # Functions # ############################################################################################################# # If we're using too much ram, send an alert and exit. sub check_ram { my ($anvil) = @_; # Problem 0 == ok, 1 == too much ram used, 2 == no pid found my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem, ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", }}); if ($problem) { # See if any jobs are running, and if so, hold because those jobs might be doing things (like # OS updates or file syncs) that could make anvil-daemon appear to be using more memory. $anvil->Database->get_jobs({debug => 2}); foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}}) { my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command}; my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_command => $job_command, job_progress => $job_progress, }}); if (($job_progress != 100) && ($job_progress != 0)) { # Don't abort. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => { job_command => $job_command, job_progress => $job_progress, ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), }}); return(0); } } # Send an alert and exit. $anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => { program => $THIS_FILE, ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), }, set_by => $THIS_FILE, sort_position => 0}); $anvil->Email->send_alerts(); # Log the same $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => { program => $THIS_FILE, ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), }}); # Exit with RC0 so that systemctl restarts $anvil->nice_exit({exit_code => 0}); } return(0); } # This decides if the local system will delay daily runs on start-up. sub set_delay { my ($anvil) = @_; my $delay = 7200; my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); if ($host_type eq "striker") { foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::host_uuid" => $anvil->data->{sys}{host_uuid}, uuid => $uuid, }}); if ($uuid eq $anvil->data->{sys}{host_uuid}) { $delay = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { delay => $delay }}); } last; } } else { # Not a dashboard, don't delay $delay = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }}); } return($delay); } # This checks to see if it's time to see if the network is ok and, if the system has been up long enough, # checks and tries to repair network issues. sub check_network { my ($anvil) = @_; ### TODO: Remove this when EL8 support is dropped. This was an issue with the old ifcfg configured bonds # The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such, # we'll start watching the network after the uptime is 2 minutes. my $uptime = $anvil->Get->uptime; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }}); if ($uptime > 120) { # Check that bonds are up. Degraded bonds will be left alone. if (not $anvil->data->{sys}{network}{initial_checks}) { my $running = $anvil->System->check_daemon({daemon => "NetworkManager"}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); if (not $running) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }}); my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); } $anvil->data->{sys}{network}{initial_checks} = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks}, }}); } check_firewall($anvil); } # Check that all users can ping. if (1) { my $shell_call = $anvil->data->{path}{exe}{sysctl}." net.ipv4.ping_group_range"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }}); if ($output =~ /net.ipv4.ping_group_range = (\d+)\t(\d+)$/) { my $lowest_uid = $1; my $highest_uid = $2; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lowest_uid => $lowest_uid, highest_uid => $highest_uid, }}); if ($highest_uid < 2000) { # Tell the user we're enabling ping for all users. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0683"}); my $shell_call = $anvil->data->{path}{exe}{sysctl}." -w net.ipv4.ping_group_range=\"0 2147483647\""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { output => $output }}); } } } # Check that there's at least one entry in 'network_interfaces' and, if not, call scan-network. if (1) { my $query = "SELECT COUNT(*) FROM network_interfaces WHERE network_interface_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid).";"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { count => $count }}); if (not $count) { # Run scan-network my $shell_call = $anvil->data->{path}{directories}{scan_agents}."/scan-network/scan-network".$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); } } return(0); } # This handles running tasks that only run on some loops. sub handle_periodic_tasks { my ($anvil) = @_; my $now_time = time; my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "s1:now_time" => $now_time, "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, "s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, "s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, "s5:host_type" => $host_type, }}); # Time to run once per minute tasks. if ($now_time >= $anvil->data->{timing}{next_minute_check}) { # Check the firewall needs to be updated. check_network($anvil); # Check to see if the PXE environment needs to be updated. check_install_target($anvil); # Check that the users we care about have ssh public keys and they're recorded in ssh_keys. $anvil->System->check_ssh_keys({debug => 2}); $anvil->System->update_hosts({debug => 2}); # Check if the files on disk have changed. Even if it is time to check, don't if a job is # running. if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums)) { # NOTE: We exit with '0' to prevent systemctl from showing a scary red message. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"}); $anvil->nice_exit({exit_code => 0}); } # Mark that we want to check the database config next time. $check_if_database_is_configured = 1; # Update the next check time. $anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, "s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, }}); # Even when this runs, it should finish in under ten seconds so we don't need to background it. my $shell_call = $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($parse_output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { parse_output => $parse_output, return_code => $return_code, }}); # Check shared files. check_files($anvil); # Check mail server config. my $problem = $anvil->Email->check_config({debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); # Check if anything is needed to be done in /mnt/shared. check_incoming($anvil); # Check for stale db_in_use states. check_db_in_use_states($anvil); # Do Striker-specific minute tasks $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); if ($host_type eq "striker") { # Look for duplicates if we're the primary DB. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::database::primary_db" => $anvil->data->{sys}{database}{primary_db}, "Get->host_uuid" => $anvil->Get->host_uuid, }}); if ($anvil->Get->host_uuid eq $anvil->data->{sys}{database}{primary_db}) { $anvil->Database->_check_for_duplicates({debug => 2}); } # Something is causing broken manifests to be created. Until found, this removes them. check_for_broken_manifests($anvil); # This can take a while, but it's been optimized to minimize how long it takes to # run. To be safe, we'll still background it. my $shell_call = $anvil->data->{path}{exe}{'striker-get-screenshots'}.$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({ background => 1, shell_call => $shell_call, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); } } # Now check to see if it's time to run less frequent tasks. if ($now_time >= $anvil->data->{timing}{next_ten_minute_check}) { my $host_type = $anvil->Get->host_type(); my $host_uuid = $anvil->Get->host_uuid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type, host_uuid => $host_uuid, }}); # Are we a Striker and is there two or more connections? If so, evaluate if we should shut # down our database. if ($host_type eq "striker") { # If we're the active database, dump our database out and rsync it to our peers. my $peers = keys %{$anvil->data->{database}}; my $connections = $anvil->data->{sys}{database}{connections}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers => $peers, connections => $connections, }}); if (exists $anvil->data->{cache}{database_handle}{$host_uuid}) { # Verify that the database is up. my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); if ($running) { # Backup our DB. my $dump_file = $anvil->Database->backup_database({debug => 2}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); # Now rsync it to our peer(s) foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) { next if $this_host_uuid eq $host_uuid; my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/"; my $password = $anvil->data->{database}{$this_host_uuid}{password}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { this_host_uuid => $this_host_uuid, destination => $destination, password => $anvil->Log->is_secure($password), }}); my $start_time = time; my $failed = $anvil->Storage->rsync({ debug => 3, destination => $destination, password => $password, source => $dump_file, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); my $rsync_time = time - $start_time; my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}}); my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}}); my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => { file => $dump_file, host_name => $target_name, took => $rsync_time, size => $size, size_bytes => $size_bytes, }}); } } } } # Reap old db_in_use states over 6 hours old. my $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND modified_date < (SELECT now() - interval '6 hour');\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); $anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); # Update the next check time. $anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, "s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, }}); } # Now check to see if it's time to run daily tasks. if ($now_time >= $anvil->data->{timing}{next_daily_check}) { # Make sure ksm, ksmtuned and tuned are disabled. foreach my $daemon ("ksm.service", "ksmtuned.service", "tuned.service") { my $status = $anvil->System->check_daemon({daemon => $daemon}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { daemon => $daemon, status => $status, }}); if ($status eq "1") { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0145", variables => { daemon => $daemon }}); $anvil->System->disable_daemon({ now => 1, daemon => $daemon, }); } } ### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we ### don't use '--force' and let striker-manage-install-target skip the repo update if it happened ### recently enough. if ($host_type eq "striker") { ### TODO: This is here only to handle the period of time where we disabled postgres ### on boot. This should be removed sometime after 2022-08-01 #$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); # Record a job, don't call it directly. It takes too long to run. my $host_uuid = $anvil->Get->host_uuid(); my ($last_age_out, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::aged-out"}); my $time_since_last_age_out = $last_age_out =~ /^\d+$/ ? time - $last_age_out : 100000; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:host_uuid' => $host_uuid, 's2:last_age_out' => $last_age_out, 's3:time_since_last_age_out' => $time_since_last_age_out, }}); # Run an age-out? if ($time_since_last_age_out > 86400) { # Age out old data. This takes up to a minute. my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => "database::".$host_uuid."::aged-out", variable_value => time, variable_default => "0", variable_description => "striker_0302", variable_section => "database", variable_source_uuid => "NULL", variable_source_table => "", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); $anvil->Database->_age_out_data(); } # Run an archive? my ($last_archive, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::archived"}); my $time_since_last_archive = $last_archive =~ /^\d+$/ ? time - $last_archive : 100000; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:last_archive' => $last_archive, 's2:time_since_last_archive' => $time_since_last_archive, }}); if ($time_since_last_archive > 86400) { # Archive old data my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => "database::".$host_uuid."::archived", variable_value => time, variable_default => "0", variable_description => "striker_0303", variable_section => "database", variable_source_uuid => "NULL", variable_source_table => "", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); $anvil->Database->archive_database(); } # Run the install target update? my ($last_mit, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target"}); my $time_since_last_mit = $last_mit =~ /^\d+$/ ? time - $last_mit : 100000; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:last_mit' => $last_mit, 's2:time_since_last_mit' => $time_since_last_mit, }}); if ($time_since_last_mit > 86400) { # Update the local install target data. my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target", variable_value => time, variable_default => "0", variable_description => "striker_0304", variable_section => "jobs", variable_source_uuid => "NULL", variable_source_table => "", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ file => $THIS_FILE, line => __LINE__, job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches, job_data => "", job_name => "install-target::refresh", job_title => "job_0015", job_description => "job_0017", job_progress => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }}); } # Update the OUI data? my ($last_parse_oui, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-parse-oui"}); my $time_since_last_parse_oui = $last_parse_oui =~ /^\d+$/ ? time - $last_parse_oui : 100000; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:last_parse_oui' => $last_parse_oui, 's2:time_since_last_parse_oui' => $time_since_last_parse_oui, }}); if ($time_since_last_parse_oui > 86400) { # Yup. my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => "jobs::last-ran::striker-parse-oui", variable_value => time, variable_default => "0", variable_description => "striker_0305", variable_section => "jobs", variable_source_uuid => "NULL", variable_source_table => "", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ file => $THIS_FILE, line => __LINE__, job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches, job_data => "", job_name => "oui-data::refresh", job_title => "job_0064", job_description => "job_0065", job_progress => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); } # Scan the network? my ($last_network_scan, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-scan-network"}); my $time_since_last_network_scan = $last_network_scan =~ /^\d+$/ ? time - $last_network_scan : 100000; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:last_network_scan' => $last_network_scan, 's2:time_since_last_network_scan' => $time_since_last_network_scan, }}); if ($time_since_last_parse_oui > 86400) { # Yup. my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => "jobs::last-ran::striker-scan-network", variable_value => time, variable_default => "0", variable_description => "striker_0306", variable_section => "jobs", variable_source_uuid => "NULL", variable_source_table => "", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ file => $THIS_FILE, line => __LINE__, job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches, job_data => "", job_name => "scan-network::refresh", job_title => "job_0066", job_description => "job_0067", job_progress => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); } } # Update the next check time. $anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, "s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, }}); } return(0); } ### TODO: Find the source of the problem and fix it properly. sub check_for_broken_manifests { my ($anvil) = @_; my $query = " SELECT manifest_uuid FROM manifests WHERE manifest_name = '-anvil-' ;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { results => $results, count => $count, }}); if ($count) { foreach my $row (@{$results}) { my $manifest_uuid = $row->[0]; my $queries = []; push @{$queries}, "DELETE FROM history.manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";"; push @{$queries}, "DELETE FROM manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";"; foreach my $query (sort {$a cmp $b} @{$queries}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }}); } $anvil->Database->write({debug => 2, query => $queries, source => $THIS_FILE, line => __LINE__}); } } return(0); } ### NOTE: This logic plays out in a slightly different way in Database->shutdown(). # Check for stale db_in_use states. sub check_db_in_use_states { my ($anvil) = @_; # We only reap db_in_use entries for us. $anvil->System->pids({debug => 2}); my $query = " SELECT state_uuid, state_name, state_note FROM states WHERE state_name LIKE 'db_in_use::%' AND state_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." ;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { results => $results, count => $count, }}); if ($count) { foreach my $row (@{$results}) { my $state_uuid = $row->[0]; my $state_name = $row->[1]; my $state_note = $row->[2]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:state_uuid' => $state_uuid, 's2:state_name' => $state_name, 's3:state_note' => $state_note, }}); my $caller = ""; my ($db_uuid, $state_pid) = ($state_name =~ /db_in_use::(.*?)::(.*)$/); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:db_uuid' => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")", 's2:state_pid' => $state_pid, }}); if ($state_pid =~ /(\d+)::(.*)$/) { $state_pid = $1; $caller = $2; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:state_pid' => $state_pid, 's2:caller' => $caller, }}); } if (not exists $anvil->data->{pids}{$state_pid}) { # Reap the 'db_is_use'. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { state_name => $state_name }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => { db => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")", pid => $state_pid, 'caller' => $caller, }}); my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); $anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); } } } return(0); } # This checks to see if any files in /mnt/shared need to be dealt with, like incorporating files in # /mnt/shared/incoming, etc. sub check_incoming { my ($anvil) = @_; my $shell_call = $anvil->data->{path}{exe}{'anvil-manage-files'}." --check".$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({ shell_call => $shell_call, source => $THIS_FILE, line => __LINE__, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); return(0); } # This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config # variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing # anything. sub check_install_target { my ($anvil) = @_; my $system_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }}); if ($system_type ne "striker") { # Not a dashboard, nothing to do. return(0); } my $status = "unavailable"; my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }}); foreach my $line (split/\n/, $output) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }}); if ($line =~ /status=(\d)/) { my $digit = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }}); if ($digit == 0) { $status = "disabled"; } elsif ($digit == 1) { $status = "enabled"; } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }}); last; } } # Record the status $anvil->Database->insert_or_update_variables({ variable_name => "install-target::enabled", variable_source_uuid => $anvil->Get->host_uuid, variable_source_table => "hosts", variable_value => $status, variable_default => "unavailable", variable_description => "striker_0110", variable_section => "system", }); return(0); } # These are tools that don't need to constantly run. They'll typically run when the server starts up or the # daemon is restarted or reloaded. sub run_once { my ($anvil) = @_; # Make sure the firewall is configured. $anvil->Network->manage_firewall(); # Check that the database is ready. prep_database($anvil); # Check to see if we need to do boot-time tasks. We only run these if we've just booted boot_time_tasks($anvil); # Check the ssh stuff. # NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well. $anvil->System->check_ssh_keys(); # Check setuid wrappers check_setuid_wrappers($anvil); # Check journald is configured for persistent storage. check_journald($anvil); if ($anvil->data->{switches}{'startup-only'}) { $anvil->nice_exit({exit_code => 0}); } return(0); } sub check_journald { my ($anvil) = @_; # Check the journald.conf to ensure logging in configured to be persistent. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }}); my $peristent_seen = 0; my $change_storage = 0; my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }}); foreach my $line (split/\n/, $old_journald_conf) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }}); if ($line =~ /^Storage=(.*)$/) { my $value = $1; if ($value eq "persistent") { $peristent_seen = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }}); } else { $change_storage = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }}); } } } # Make sure the journald directory $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }}); if (not -d $anvil->data->{path}{directories}{journald}) { $anvil->Storage->make_directory({ debug => 2, directory => $anvil->data->{path}{directories}{journald}, }); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }}); } # Make sure the journald is configured for persistent storage. if (not $peristent_seen) { my $storage_added = 0; my $new_journald_conf = ""; foreach my $line (split/\n/, $old_journald_conf) { if (($line =~ /^Storage=/) && ($change_storage)) { if (not $storage_added) { $storage_added = 1; $new_journald_conf .= "Storage=persistent\n"; } next; } if (($line =~ /^#Storage=/) && (not $storage_added)) { $storage_added = 1; $new_journald_conf .= "Storage=persistent\n"; } $new_journald_conf .= $line."\n"; } if (not $storage_added) { $new_journald_conf .= "Storage=persistent\n"; } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }}); $anvil->Storage->write_file({ debug => 3, secure => 0, file => $anvil->data->{path}{configs}{'journald.conf'}, body => $new_journald_conf, mode => "0644", overwrite => 1, }); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }}); # Restart the journald service. my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); } return(0); } # This creates, as needed, the setuid wrappers used by striker-ui-api to make certain system calls. sub check_setuid_wrappers { my ($anvil) = @_; my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }}); if ($host_type ne "striker") { # Not a dashboard, setuid scripts aren't needed. return(0); } # Does the call_striker-get-peer-data wrapper exist yet? if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}) { # Exists, skipping. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }}); } else { # What is the admin user and group ID? my $admin_uid = getpwnam('admin'); my $admin_gid = getgrnam('admin'); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { admin_uid => $admin_uid, admin_gid => $admin_gid, }}); next if not $admin_uid; next if not $admin_gid; # Write the body out my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n"; $call_striker_get_peer_data_body .= "main(ac, av)\n"; $call_striker_get_peer_data_body .= "char **av;\n"; $call_striker_get_peer_data_body .= "{\n"; $call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n"; $call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n"; $call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n"; $call_striker_get_peer_data_body .= "}\n"; my $error = $anvil->Storage->write_file({ debug => 3, file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c", body => $call_striker_get_peer_data_body, mode => '644', overwrite => 1, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }}); # If it wrote out, compile it. if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c") { # Failed to write. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }}); } else { # Compile it my ($output, $return_code) = $anvil->System->call({ debug => 3, shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code, }}); # If it compiled, setuid it. if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}) { # Something went wrong compiling it. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }}); } else { $anvil->Storage->change_owner({ debug => 3, path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}, user => 'root', group => 'root', }); $anvil->Storage->change_mode({ debug => 3, path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}, mode => '4755', }); } } } return(0); } # Configure/update the firewall. sub check_firewall { my ($anvil) = @_; return(0); # Don't call this if we're not configured yet. my $configured = $anvil->System->check_if_configured({debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); # Check the firewall needs to be updated. if ($configured) { my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }}); } return(0); } # This handles tasks that need to run on boot (if any) sub boot_time_tasks { my ($anvil) = @_; # If the uptime is less than ten minutes, clear the reboot flag. my $uptime = $anvil->Get->uptime; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); # Now find out if a reboot is listed as needed and when it was last changed. my $reboot_needed = 0; my $changed_seconds_ago = 0; my $query = " SELECT variable_value, (SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago FROM variables WHERE variable_source_table = 'hosts' AND variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." AND variable_name = 'reboot::needed' ;"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { results => $results, count => $count, }}); if ($count) { $reboot_needed = $results->[0]->[0]; $changed_seconds_ago = $results->[0]->[1]; $changed_seconds_ago =~ s/^(\d+)\..*$/$1/; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed, changed_seconds_ago => $changed_seconds_ago, }}); } ### TODO: This shouldn't be needed anymore. anvil-manage-power doesn't set the progress to '50' prior ### to reboot anymore. # If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was # set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to # Lisa Seelye (@thedoh) for this idea! my $difference = ($changed_seconds_ago - $uptime); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "s1:reboot_needed" => $reboot_needed, "s2:changed_seconds_ago" => $changed_seconds_ago, "s3:uptime" => $uptime, "s4:difference" => $difference, }}); if ($reboot_needed) { if ($uptime < $changed_seconds_ago) { # Clear the reboot request. $reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }}); # Check to see if there was a reboot job in progress. If so, finish it off. my $job_uuid = $anvil->Job->get_job_uuid({ debug => 2, program => "anvil-manage-power", incomplete => 1, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); if ($job_uuid) { # Update the percentage to '100' and then clear the old PID. my $date_time = $anvil->Get->date_and_time(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }}); $anvil->Job->update_progress({ progress => 100, message => "message_0064,!!date_and_time!".$date_time."!!", job_uuid => $job_uuid, picked_up_by => 0, }); } } } else { # Update our status $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"}); $anvil->Database->get_hosts({debug => 2}); my $host_uuid = $anvil->Get->host_uuid({debug => 2}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }}); $anvil->Database->insert_or_update_hosts({ debug => 2, host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi}, host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key}, host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}, host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}, host_uuid => $host_uuid, host_status => "online", }); # Make sure our stop reason is cleared. my $variable_uuid = $anvil->Database->insert_or_update_variables({ variable_name => 'system::stop_reason', variable_value => '', variable_default => '', variable_description => 'striker_0279', variable_section => 'system', variable_source_uuid => $host_uuid, variable_source_table => 'hosts', }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }}); } # Make sure /etc/hosts is updated. $anvil->System->update_hosts({debug => 2}); # This handles weird bits for things like bug work-arounds. handle_special_cases($anvil); # Now look for jobs that have a job status of 'anvil_startup' run_jobs($anvil, 1); # Check the firewall needs to be updated. check_firewall($anvil); return(0); } # This handles weird bits for things like bug work-arounds. sub handle_special_cases { my ($anvil) = @_; # Thsi is now handled by 'anvil-version-changes' my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { states_output => $states_output, return_code => $return_code, }}); return(0); } # Configure the local database, if needed. sub prep_database { my ($anvil) = @_; # If there's a backup file, we're configured and possibly just off. my $prep_database = 1; foreach my $uuid (keys %{$anvil->data->{database}}) { my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$uuid.".sql"; $dump_file =~ s/\/\//\//g; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); if (-e $dump_file) { # No need to prepare. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0665", variables => { file => $dump_file }}); $prep_database = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database }}); } } # Only run this if we're a dashboard. my $host_type = $anvil->Get->host_type(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); if ($host_type eq "striker") { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database, "sys::database::connections" => $anvil->data->{sys}{database}{connections}, }}); if ($prep_database) { $anvil->Database->configure_pgsql({debug => 2}) # ### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging. # my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}$anvil->Log->switches; # $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); # my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__ }); # $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { # database_output => $database_output, # return_code => $return_code, # }}); } elsif (not $anvil->data->{sys}{database}{connections}) { # Start the daemon locally, if needed. my $running = $anvil->System->check_daemon({daemon => "postgresql"}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }}); if ($running == 2) { # Not installed, nothing to do. } elsif (not $running) { # Start it. my $return_code = $anvil->System->start_daemon({daemon => "postgresql"}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); } } } return(0); } # These are tools that need to keep running. sub keep_running { my ($anvil) = @_; # Check for jobs that were running and now exited. if (exists $anvil->data->{processes}) { foreach my $job_uuid (%{$anvil->data->{jobs}{handles}}) { # If it's not a handle, delete it. my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, running => $running, }}); # If it's not running, update the table to clear the 'job_picked_up_by' column. if (not $running) { my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid, exit_status => $exit_status, }}); # Free up memory $anvil->data->{jobs}{handles}{$job_uuid}->cleanup(); $anvil->Job->clear({job_uuid => $job_uuid}); } } } # If we're configured, write out the status JSON file. If we're not configured, Update hardware state files. my $configured = $anvil->System->check_if_configured; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); if ($configured) { # Write out state information for all known Anvil! systems and the information from # unconfigured nods and DR hosts, using just database data (hence, fast enough to run # constantly). $anvil->System->generate_state_json({debug => 2}); } # Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process. run_jobs($anvil, 0); return(0); } # This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made # to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is # invoked to handle it. sub run_jobs { my ($anvil, $startup) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup => $startup }}); # Don't start jobs for 30 seconds after startup. if (not $startup) { my $time_since_start = time - $start_time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { time_since_start => $time_since_start, start_time => $start_time, }}); if ($time_since_start < 60) { # Log that we'll start jobs in X seconds. my $will_start_in = 60 - $time_since_start; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0326", variables => { will_start_in => $will_start_in }}); return(0); } } # This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's # changed on disk. $anvil->data->{sys}{jobs_running} = 0; # We'll also update the jobs.json file. my $jobs_file = "{\"jobs\":[\n"; # Get a list of pending or incomplete jobs. my $ended_within = $startup ? 1 : 300; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ended_within => $ended_within }}); $anvil->Database->get_jobs({ debug => 2, ended_within => $ended_within, }); foreach my $modified_date (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { modified_date => $modified_date }}); foreach my $job_uuid (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}}) { # Reload the jobs so we get an updated view of them. $anvil->Database->get_jobs({ debug => 2, ended_within => $ended_within, }); # Collect the data. my $job_command = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_command}; my $short_command = $job_command; $short_command =~ s/\s.*$//; my $job_data = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_data}; my $job_picked_up_by = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_by}; my $job_picked_up_at = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_at}; my $job_updated = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_updated}; my $job_name = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_name}; my $job_progress = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_progress}; my $job_title = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_title}; my $job_description = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_description}; my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status}; my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0; my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's01:job_uuid' => $job_uuid, 's02:job_command' => $job_command, 's03:short_command' => $short_command, 's04:job_data' => $job_data, 's05:job_picked_up_by' => $job_picked_up_by, 's06:job_picked_up_at' => $job_picked_up_at, 's07:job_updated' => $job_updated, 's08:job_name' => $job_name, 's09:job_progress' => $job_progress, 's10:job_title' => $job_title, 's11:job_description' => $job_description, 's12:job_status' => $job_status, 's13:started_seconds_ago' => $started_seconds_ago, 's14:updated_seconds_ago' => $updated_seconds_ago, }}); # If we're not configured, we will only run the 'anvil-configure-host' job my $configured = $anvil->System->check_if_configured; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { configured => $configured }}); if ((not $configured) && ($job_command !~ /anvil-configure-host/)) { next; } # To minimize the chance of race conditions, any given command will be called only # once at a time. If two jobs of the same command exist, only one will be called. if ($job_progress != 100) { if (exists $anvil->data->{sys}{started}{$short_command}) { # Skip it. my $started_job = $anvil->data->{sys}{started}{$short_command}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0737", variables => { started_job => $started_job, job_uuid => $job_uuid, command => $short_command, }}); next; } $anvil->data->{sys}{started}{$short_command} = $job_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::started::${short_command}" => $anvil->data->{sys}{started}{$short_command} }}); } # If this is a start-up call, only start jobs whose status is 'anvil_startup'. if (($startup) && ($configured) && ($job_status ne "anvil_startup")) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => { job_uuid => $job_uuid, job_command => $job_command, }}); next; } if ($job_progress == 100) { # This is a job that might have just completed, clear the started value. $anvil->data->{jobs}{$job_uuid}{started} = 0; $job_picked_up_at = 0; $job_picked_up_by = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_at => $job_picked_up_at, job_picked_up_by => $job_picked_up_by, "jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started}, }}); } else { $anvil->data->{sys}{jobs_running} = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }}); } # See if the job was picked up by a now-dead instance. if ($job_picked_up_by) { # Check if the PID is still active. $anvil->System->pids({ignore_me => 1}); ### TODO: Add a check to verify the job isn't hung. # Skip if this job is in progress. if (not exists $anvil->data->{pids}{$job_picked_up_by}) { # If the job is done, just clear the 'job_picked_up_by' and be done. if ($job_progress ne "100") { # It's possible that the job updated to 100% and exited after # we gathered the job data, so we won't restart until we've # seen it not running and not at 100% after 5 loops. if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid})) { $anvil->data->{lost_job_count}{$job_uuid} = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } if ($anvil->data->{lost_job_count}{$job_uuid} > 5) { # The previous job is gone, but the job isn't # finished. Start it again. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => { command => $job_command, pid => $job_picked_up_by, percent => $job_progress, }}); # Clear some variables. $job_progress = 0; $job_status = "message_0056"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_progress => $job_progress, job_status => $job_status, }}); # Clear the job. $anvil->Job->clear({debug => 2, job_uuid => $job_uuid}); $anvil->data->{lost_job_count}{$job_uuid} = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } else { $anvil->data->{lost_job_count}{$job_uuid}++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } } # Clear the PID $job_picked_up_by = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }}); } elsif ($job_progress ne "100") { # The job is running. $anvil->data->{jobs_started}{$short_command} = $job_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }}); } } # Convert the double-banged strings into a proper message. my $say_title = $job_title ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_title}) : ""; my $say_description = $job_description ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_description}) : ""; my $say_status = $job_status ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_status}) : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { job_title => $job_title, say_description => $say_description, say_status => $say_status, }}); # Make the status HTML friendly. Strip any embedded HTML then encode the text string. if ($say_status) { my $html_strip = HTML::Strip->new(); $say_status = $html_strip->parse($say_status); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); # Now make the resulting text string HTML friendly my $text_to_html = HTML::FromText->new({ urls => 1, email => 1, lines => 1, }); $say_status = $text_to_html->parse($say_status); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); } # Add this to the jobs.json file my $json_string = to_json ({ job_uuid => $job_uuid, job_command => $job_command, job_data => $job_data, job_picked_up_at => $job_picked_up_at, job_updated => $job_updated, job_name => $job_name, job_progress => $job_progress, job_title => $say_title, job_description => $say_description, job_status => $say_status, started_seconds_ago => $started_seconds_ago, updated_seconds_ago => $updated_seconds_ago, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }}); $jobs_file .= $json_string.",\n"; # If the job is done, move on. next if $job_progress == 100; next if $anvil->data->{switches}{'no-start'}; # If 'startup' is set, we only care if 'job_status' is 'anvil_startup' if ((not $startup) && ($say_status eq "anvil_startup")) { # Skip this, it will run next time anvil-daemon restarts. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => { command => $job_command, job_uuid => $job_uuid, }}); next; } # If the job is not running, and we've not started any other of the same command this # loop, start it. if (not $job_picked_up_by) { if (exists $anvil->data->{jobs_started}{$short_command}) { # Is the job_uuid associated with this command done? my $started_job_uuid = $anvil->data->{jobs_started}{$short_command}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { started_job_uuid => $started_job_uuid }}); if (exists $anvil->data->{jobs}{running}{$started_job_uuid}) { # If the previously running job and this job have the same # UUID, it failed and needs to restart. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid, started_job_uuid => $started_job_uuid, "jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress}, }}); if ($started_job_uuid eq $job_uuid) { # We're restarting. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => { command => $job_command, job_uuid => $job_uuid, }}); } elsif ($anvil->data->{jobs}{running}{$started_job_uuid}{job_progress} != 100) { # Don't start it in this pass. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => { command => $job_command, this_job_uuid => $job_uuid, other_job_uuid => $started_job_uuid, }}); next; } else { # The previous job is done, delete it. $anvil->data->{jobs_started}{$short_command} = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command}, }}); } } } my $command = $job_command." --job-uuid ".$job_uuid; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }}); # Have we started this job recently? if (exists $anvil->data->{jobs}{$job_uuid}{started}) { my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }}); if ($last_start < 60) { # Skip, Started too recently. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => { command => $command, last_start => $last_start, }}); next; } } # Start the job, appending '--job-uuid' to the command. ($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({ background => 1, stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout", stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr", shell_call => $command, source => $THIS_FILE, line => __LINE__, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, return_code => $return_code, }}); # Log the PID (the job should update the database). my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); # Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute. $anvil->data->{jobs}{$job_uuid}{started} = time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }}); # Record that a job with this command has started $anvil->data->{jobs_started}{$short_command} = $job_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }}); } } } # Close the jobs file. $jobs_file =~ s/,\n$/\n/ms; $jobs_file .= "]}\n"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }}); # Write the JSON file my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }}); $anvil->Storage->write_file({ file => $output_json, body => $jobs_file, overwrite => 1, backup => 0, mode => "0644", user => "striker-ui-api", group => "striker-ui-api", }); return(0); } # sub check_files { my ($anvil) = @_; # Make sure the shared directories exist. foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}}) { my $directory = $anvil->data->{path}{directories}{shared}{$target}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target => $target, directory => $directory, }}); if (-e $directory) { # Make sure the permissions are correct. $anvil->Storage->get_file_stats({file_path => $directory}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "file_stat::${directory}::unix_mode" => $anvil->data->{file_stat}{$directory}{unix_mode}, }}); if ($anvil->data->{file_stat}{$directory}{unix_mode} !~ /0777$/) { $anvil->Storage->change_mode({ debug => 2, path => $directory, mode => "0777" }); } } elsif (not -e $anvil->data->{path}{directories}{shared}{$target}) { my $failed = $anvil->Storage->make_directory({ directory => $directory, group => "striker-ui-api", user => "striker-ui-api", mode => "0777", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }}); if ($failed) { # Something went wrong. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => { directory => $directory, }}); } else { # Success $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => { directory => $directory, }}); } } } # Look for files on our system that are in file_locations. If they're shown as ready, make sure # they're there. If they're marked as not ready, see if they now are. $anvil->Storage->check_files({debug => 2}); return(0); }