You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1955 lines
77 KiB
1955 lines
77 KiB
#!/usr/bin/perl |
|
# |
|
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster |
|
# nodes and DR hosts. |
|
# |
|
# Exit codes; |
|
# 0 = Normal exit or md5sum of this program changed and it exited to reload. |
|
# 1 = Not running as root. |
|
# 2 = Unable to connect to any database, even after trying to initialize the local system. |
|
# |
|
# TODO: |
|
# - Need to check what kind of machine this is and not prep the database unless its a dashboard. |
|
# - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs |
|
# - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good |
|
# idea or not. |
|
# - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/ |
|
# - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled. |
|
# - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU= |
|
# - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes |
|
# - |
|
# - Increase DRBD's default timeout |
|
# - Check for and enable persistent journald logging |
|
# |
|
# NOTE: |
|
# - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the |
|
# power button. Might be useful in ScanCore down the road. |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
use Proc::Simple; |
|
#use Time::HiRes qw ( time sleep ); |
|
use JSON; |
|
use HTML::Strip; |
|
use HTML::FromText; |
|
use Data::Dumper; |
|
use Text::Diff; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error. |
|
$< = $>; |
|
$( = $); |
|
|
|
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods |
|
# in the loop as well to override defaults in code. |
|
my $anvil = Anvil::Tools->new(); |
|
|
|
# Make sure we're running as 'root' |
|
# $< == real UID, $> == effective UID |
|
if (($< != 0) && ($> != 0)) |
|
{ |
|
# Not root |
|
print $anvil->Words->string({key => "error_0005"})."\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# If, so some reason, anvil.conf is lost, create it. |
|
$anvil->System->_check_anvil_conf(); |
|
|
|
# If dnf is running, hold. |
|
$anvil->System->wait_on_dnf(); |
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
|
# is to setup the database server. |
|
$anvil->Database->connect({ |
|
check_if_configured => 1, |
|
check_for_resync => 1, |
|
}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"}); |
|
|
|
# If I have no databases, sleep for a second and then exit (systemd will restart us). |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a |
|
# dashboard, then just go into a loop waiting for a database to be configured. |
|
if ($anvil->Get->host_type eq "striker") |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"}); |
|
prep_database($anvil); |
|
|
|
# Try connecting again |
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Still nothing, sleep and exit. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"}); |
|
$anvil->nice_exit({exit_code => 2}); |
|
} |
|
} |
|
else |
|
{ |
|
# Wait until we have one. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"}); |
|
|
|
until($anvil->data->{sys}{database}{connections}) |
|
{ |
|
sleep 10; |
|
|
|
check_network($anvil); |
|
$anvil->refresh(); |
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"}); |
|
} |
|
} |
|
} |
|
} |
|
|
|
# Read switches |
|
$anvil->Get->switches({list => [ |
|
"clear-mapping", |
|
"refresh-json", |
|
"run-once", |
|
"main-loop-only", |
|
"no-start", |
|
"startup-only"], man => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
|
|
|
if ($anvil->data->{switches}{'refresh-json'}) |
|
{ |
|
$anvil->data->{switches}{'run-once'} = 1; |
|
$anvil->data->{switches}{'main-loop-only'} = 1; |
|
$anvil->data->{switches}{'no-start'} = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"switches::run-once" => $anvil->data->{switches}{'run-once'}, |
|
"switches::main-loop-only" => $anvil->data->{switches}{'main-loop-only'}, |
|
"switches::no-start" => $anvil->data->{switches}{'no-start'}, |
|
}}); |
|
} |
|
|
|
# This is used to track initial checkes / repairs of network issues. |
|
$anvil->data->{sys}{network}{initial_checks} = 0; |
|
|
|
# We use this to delay starting jobs for a short time. |
|
our $start_time = time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }}); |
|
|
|
# There are some things we only want to run on (re)start and don't need to always run. |
|
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'}; |
|
|
|
# Calculate my sum so that we can exit if it changes later. |
|
$anvil->Storage->record_md5sums; |
|
|
|
# What time is it, Mr. Fox? |
|
my $now_time = time; |
|
|
|
# To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest |
|
# host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay. |
|
my $delay = set_delay($anvil); |
|
|
|
# Once a minute, we'll check the md5sums and see if we should restart. |
|
# Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards). |
|
$anvil->data->{timing}{minute_checks} = 60; |
|
$anvil->data->{timing}{ten_minute_checks} = 600; |
|
$anvil->data->{timing}{daily_checks} = 86400; |
|
$anvil->data->{timing}{repo_update_interval} = 86400; |
|
$anvil->data->{timing}{next_minute_check} = $now_time - 1; |
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1; |
|
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, |
|
"s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, |
|
"s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, |
|
"s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval}, |
|
"s5:now_time" => $now_time, |
|
"s6:delay" => $delay, |
|
"s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, |
|
"s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, |
|
"s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, |
|
}}); |
|
|
|
# Disconnect. We'll reconnect inside the loop |
|
$anvil->Database->disconnect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"}); |
|
|
|
# This will prevent restarting while jobs are running. |
|
$anvil->data->{sys}{jobs_running} = 0; |
|
|
|
# When we periodically check if system files have changed, we'll also ask Database>connect() to check if it |
|
# needs to be configured or updated. This is done periodically as it is expensive to run on every loop. |
|
my $check_if_database_is_configured = 0; |
|
|
|
# These are the things we always want running. |
|
while(1) |
|
{ |
|
# Reload defaults, re-read the config and then connect to the database(s) |
|
$anvil->refresh(); |
|
|
|
# If, so some reason, anvil.conf is lost, create it. |
|
$anvil->System->_check_anvil_conf(); |
|
|
|
$anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 1}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"}); |
|
|
|
# Mark that we don't want to check the database now. |
|
$check_if_database_is_configured = 0; |
|
|
|
# If this host is mapping the network, we'll skip a lot of stuff. If set for over an hour, we'll |
|
# clear it. |
|
$anvil->data->{sys}{mapping_network} = check_if_mapping($anvil); |
|
|
|
if ($anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Run the normal tasks |
|
keep_running($anvil); |
|
|
|
# Handle periodic tasks |
|
handle_periodic_tasks($anvil) if not $anvil->data->{sys}{mapping_network}; |
|
} |
|
else |
|
{ |
|
# No databases available, we'll update the state file in case this host is having it's |
|
# network mapped and the interface used to talk to the databases went down. That's all we |
|
# can do though. |
|
update_state_file($anvil); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "log_0202"}); |
|
} |
|
|
|
# Exit if 'run-once' selected. |
|
if ($anvil->data->{switches}{'run-once'}) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
# Check how much RAM we're using. |
|
check_ram($anvil); |
|
|
|
# Disconnect from the database(s) and sleep now. |
|
$anvil->Database->disconnect(); |
|
sleep(2); |
|
} |
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
# If we're using too much ram, send an alert and exit. |
|
sub check_ram |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Problem 0 == ok, 1 == too much ram used, 2 == no pid found |
|
my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
problem => $problem, |
|
ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")", |
|
}}); |
|
if ($problem) |
|
{ |
|
# See if any jobs are running, and if so, hold because those jobs might be doing things (like |
|
# OS updates or file syncs) that could make anvil-daemon appear to be using more memory. |
|
$anvil->Database->get_jobs({debug => 2}); |
|
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}}) |
|
{ |
|
my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command}; |
|
my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_command => $job_command, |
|
job_progress => $job_progress, |
|
}}); |
|
|
|
if (($job_progress != 100) && ($job_progress != 0)) |
|
{ |
|
# Don't abort. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => { |
|
job_command => $job_command, |
|
job_progress => $job_progress, |
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), |
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), |
|
}}); |
|
return(0); |
|
} |
|
} |
|
|
|
# Send an alert and exit. |
|
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => { |
|
program => $THIS_FILE, |
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), |
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), |
|
}, set_by => $THIS_FILE, sort_position => 0}); |
|
$anvil->Email->send_alerts(); |
|
|
|
# Log the same |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => { |
|
program => $THIS_FILE, |
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}), |
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}), |
|
}}); |
|
|
|
# Exit with RC0 so that systemctl restarts |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# Check to see if we're mapping the network on this host. |
|
sub check_if_mapping |
|
{ |
|
my ($anvil) = @_; |
|
|
|
$anvil->data->{sys}{mapping_network} = 0; |
|
if ($anvil->data->{sys}{database}{connections}) |
|
{ |
|
my ($map_network_value, $map_network_uuid, $map_network_mtime, $map_network_modified_date) = $anvil->Database->read_variable({ |
|
debug => 3, |
|
variable_name => "config::map_network", |
|
variable_source_table => "hosts", |
|
variable_source_uuid => $anvil->data->{sys}{host_uuid}, |
|
}); |
|
# We'll run for a day (should be cancelled by the program when the user's done, so this |
|
# shouldn't fire in practice). |
|
my $expire_age = 86400; |
|
my $map_network_age = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:map_network_value' => $map_network_value, |
|
's2:map_network_mtime' => $map_network_mtime, |
|
's3:map_network_modified_date' => $map_network_modified_date, |
|
's4:map_network_uuid' => $map_network_uuid, |
|
}}); |
|
if ($map_network_uuid) |
|
{ |
|
$map_network_age = time - $map_network_mtime; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { map_network_age => $map_network_age }}); |
|
} |
|
if ($map_network_value) |
|
{ |
|
# How long ago was it set? |
|
$anvil->data->{switches}{'clear-mapping'} = "" if not defined $anvil->data->{switches}{'clear-mapping'}; |
|
if (($map_network_age >= $expire_age) or ($anvil->data->{switches}{'clear-mapping'})) |
|
{ |
|
# Clear it. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0470"}); |
|
$anvil->Database->insert_or_update_variables({ |
|
debug => 3, |
|
variable_value => 0, |
|
variable_uuid => $map_network_uuid, |
|
update_value_only => 1, |
|
}); |
|
} |
|
else |
|
{ |
|
# Mark it so we only track the network. |
|
my $say_age = $anvil->Convert->add_commas({number => $expire_age}); |
|
my $timeout = $anvil->Convert->add_commas({number => ($expire_age - $map_network_age)}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0471", variables => { |
|
age => $say_age, |
|
timeout => $timeout, |
|
}}); |
|
|
|
$anvil->data->{sys}{mapping_network} = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }}); |
|
|
|
# Close any open ssh connections. |
|
foreach my $ssh_fh_key (keys %{$anvil->data->{cache}{ssh_fh}}) |
|
{ |
|
my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
ssh_fh_key => $ssh_fh_key, |
|
ssh_fh => $ssh_fh, |
|
}}); |
|
if ($ssh_fh =~ /^Net::OpenSSH/) |
|
{ |
|
$ssh_fh->disconnect(); |
|
} |
|
delete $anvil->data->{cache}{ssh_fh}{$ssh_fh_key}; |
|
} |
|
} |
|
} |
|
} |
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }}); |
|
return($anvil->data->{sys}{mapping_network}); |
|
} |
|
|
|
# This decides if the local system will delay daily runs on start-up. |
|
sub set_delay |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $delay = 7200; |
|
my $host_type = $anvil->Get->host_type(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); |
|
if ($host_type eq "striker") |
|
{ |
|
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"sys::host_uuid" => $anvil->data->{sys}{host_uuid}, |
|
uuid => $uuid, |
|
}}); |
|
if ($uuid eq $anvil->data->{sys}{host_uuid}) |
|
{ |
|
$delay = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { delay => $delay }}); |
|
} |
|
last; |
|
} |
|
} |
|
else |
|
{ |
|
# Not a dashboard, don't delay |
|
$delay = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }}); |
|
} |
|
|
|
return($delay); |
|
} |
|
|
|
# This checks to see if it's time to see if the network is ok and, if the system has been up long enough, |
|
# checks and tries to repair network issues. |
|
sub check_network |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such, |
|
# we'll start watching the network after the uptime is 2 minutes. |
|
my $uptime = $anvil->Get->uptime; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }}); |
|
if ($uptime > 120) |
|
{ |
|
# Check that bonds are up. Degraded bonds will be left alone. |
|
if (not $anvil->data->{sys}{network}{initial_checks}) |
|
{ |
|
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); |
|
|
|
if (not $running) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }}); |
|
my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }}); |
|
} |
|
|
|
#$anvil->Network->check_network({heal => "all"}); |
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
"sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks}, |
|
}}); |
|
} |
|
else |
|
{ |
|
### NOTE: This is constantly trying to "fix" healthy bonds, without a know way to |
|
### trigger to debug. As such, disabling for now. |
|
#$anvil->Network->check_network({heal => "down_only"}); |
|
} |
|
|
|
check_firewall($anvil); |
|
} |
|
|
|
# Check that all users can ping. |
|
if (1) |
|
{ |
|
my $shell_call = $anvil->data->{path}{exe}{sysctl}." net.ipv4.ping_group_range"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }}); |
|
|
|
if ($output =~ /net.ipv4.ping_group_range = (\d+)\t(\d+)$/) |
|
{ |
|
my $lowest_uid = $1; |
|
my $highest_uid = $2; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
lowest_uid => $lowest_uid, |
|
highest_uid => $highest_uid, |
|
}}); |
|
|
|
if ($highest_uid < 2000) |
|
{ |
|
# Tell the user we're enabling ping for all users. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0683"}); |
|
|
|
my $shell_call = $anvil->data->{path}{exe}{sysctl}." -w net.ipv4.ping_group_range=\"0 2147483647\""; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { shell_call => $shell_call }}); |
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { output => $output }}); |
|
|
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This handles running tasks that only run on some loops. |
|
sub handle_periodic_tasks |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $now_time = time; |
|
my $host_type = $anvil->Get->host_type(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:now_time" => $now_time, |
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, |
|
"s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, |
|
"s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, |
|
"s5:host_type" => $host_type, |
|
}}); |
|
|
|
# Time to run once per minute tasks. |
|
if ($now_time >= $anvil->data->{timing}{next_minute_check}) |
|
{ |
|
# Check the firewall needs to be updated. |
|
check_network($anvil); |
|
|
|
# Check to see if the PXE environment needs to be updated. |
|
check_install_target($anvil); |
|
|
|
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys. |
|
$anvil->System->check_ssh_keys({debug => 2}); |
|
|
|
$anvil->System->update_hosts({debug => 3}); |
|
|
|
# Check if the files on disk have changed. Even if it is time to check, don't if a job is |
|
# running. |
|
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums)) |
|
{ |
|
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
# Mark that we want to check the database config next time. |
|
$check_if_database_is_configured = 1; |
|
|
|
# Update the next check time. |
|
$anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks}, |
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check}, |
|
}}); |
|
|
|
# Even when this runs, it should finish in under ten seconds so we don't need to background it. |
|
my ($parse_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { parse_output => $parse_output }}); |
|
|
|
# Scan the local network. |
|
update_state_file($anvil); |
|
|
|
# Check shared files. |
|
check_files($anvil); |
|
|
|
# Check mail server config. |
|
my $problem = $anvil->Email->check_config({debug => 3}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); |
|
|
|
# Check if anything is needed to be done in /mnt/shared. |
|
check_incoming($anvil); |
|
|
|
# Check for stale db_in_use states. |
|
check_db_in_use_states($anvil); |
|
} |
|
|
|
# Now check to see if it's time to run less frequent tasks. |
|
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check}) |
|
{ |
|
my $host_type = $anvil->Get->host_type(); |
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
host_type => $host_type, |
|
host_uuid => $host_uuid, |
|
}}); |
|
|
|
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut |
|
# down our database. |
|
if ($host_type eq "striker") |
|
{ |
|
# If we're the active database, dump our database out and rsync it to our peers. |
|
my $peers = keys %{$anvil->data->{database}}; |
|
my $connections = $anvil->data->{sys}{database}{connections}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
peers => $peers, |
|
connections => $connections, |
|
}}); |
|
if (exists $anvil->data->{cache}{database_handle}{$host_uuid}) |
|
{ |
|
# Verify that the database is up. |
|
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }}); |
|
if ($running) |
|
{ |
|
# Backup our DB. |
|
my $dump_file = $anvil->Database->backup_database({debug => 2}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); |
|
|
|
# Now rsync it to our peer(s) |
|
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}}) |
|
{ |
|
next if $this_host_uuid eq $host_uuid; |
|
|
|
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/"; |
|
my $password = $anvil->data->{database}{$this_host_uuid}{password}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
this_host_uuid => $this_host_uuid, |
|
destination => $destination, |
|
password => $anvil->Log->is_secure($password), |
|
}}); |
|
|
|
my $start_time = time; |
|
my $failed = $anvil->Storage->rsync({ |
|
debug => 3, |
|
destination => $destination, |
|
password => $password, |
|
source => $dump_file, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
|
|
my $rsync_time = time - $start_time; |
|
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}}); |
|
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}}); |
|
my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => { |
|
file => $dump_file, |
|
host_name => $target_name, |
|
took => $rsync_time, |
|
size => $size, |
|
size_bytes => $size_bytes, |
|
}}); |
|
} |
|
} |
|
} |
|
} |
|
|
|
# Reap old db_in_use states over 6 hours old. |
|
my $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND modified_date < (SELECT now() - interval '6 hour');\n"; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); |
|
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); |
|
|
|
# Update the next check time. |
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks}, |
|
"s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check}, |
|
}}); |
|
} |
|
|
|
# Now check to see if it's time to run daily tasks. |
|
if ($now_time >= $anvil->data->{timing}{next_daily_check}) |
|
{ |
|
# Make sure ksm, ksmtuned and tuned are disabled. |
|
foreach my $daemon ("ksm.service", "ksmtuned.service", "tuned.service") |
|
{ |
|
my $status = $anvil->System->check_daemon({daemon => $daemon}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
daemon => $daemon, |
|
status => $status, |
|
}}); |
|
if ($status eq "1") |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0145", variables => { daemon => $daemon }}); |
|
$anvil->System->disable_daemon({ |
|
now => 1, |
|
daemon => $daemon, |
|
}); |
|
} |
|
} |
|
|
|
### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we |
|
### don't use '--force' and let striker-manage-install-target skip the repo update if it happened |
|
### recently enough. |
|
if ($host_type eq "striker") |
|
{ |
|
### TODO: This is here only to handle the period of time where we disabled postgres |
|
### on boot. This should be removed sometime after 2022-08-01 |
|
#$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}}); |
|
|
|
# Record a job, don't call it directly. It takes too long to run. |
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
my ($last_age_out, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::aged-out"}); |
|
my $time_since_last_age_out = $last_age_out =~ /^\d+$/ ? time - $last_age_out : 100000; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:host_uuid' => $host_uuid, |
|
's2:last_age_out' => $last_age_out, |
|
's3:time_since_last_age_out' => $time_since_last_age_out, |
|
}}); |
|
|
|
# Run an age-out? |
|
if ($time_since_last_age_out > 86400) |
|
{ |
|
# Age out old data. This takes up to a minute. |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => "database::".$host_uuid."::aged-out", |
|
variable_value => time, |
|
variable_default => "0", |
|
variable_description => "striker_0302", |
|
variable_section => "database", |
|
variable_source_uuid => "NULL", |
|
variable_source_table => "", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
$anvil->Database->_age_out_data(); |
|
} |
|
|
|
# Run an archive? |
|
my ($last_archive, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::archived"}); |
|
my $time_since_last_archive = $last_archive =~ /^\d+$/ ? time - $last_archive : 100000; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:last_archive' => $last_archive, |
|
's2:time_since_last_archive' => $time_since_last_archive, |
|
}}); |
|
if ($time_since_last_archive > 86400) |
|
{ |
|
# Archive old data |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => "database::".$host_uuid."::archived", |
|
variable_value => time, |
|
variable_default => "0", |
|
variable_description => "striker_0303", |
|
variable_section => "database", |
|
variable_source_uuid => "NULL", |
|
variable_source_table => "", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
$anvil->Database->archive_database(); |
|
} |
|
|
|
# Run the install target update? |
|
my ($last_mit, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target"}); |
|
my $time_since_last_mit = $last_mit =~ /^\d+$/ ? time - $last_mit : 100000; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:last_mit' => $last_mit, |
|
's2:time_since_last_mit' => $time_since_last_mit, |
|
}}); |
|
if ($time_since_last_mit > 86400) |
|
{ |
|
# Update the local install target data. |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target", |
|
variable_value => time, |
|
variable_default => "0", |
|
variable_description => "striker_0304", |
|
variable_section => "jobs", |
|
variable_source_uuid => "NULL", |
|
variable_source_table => "", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ |
|
file => $THIS_FILE, |
|
line => __LINE__, |
|
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches, |
|
job_data => "", |
|
job_name => "install-target::refresh", |
|
job_title => "job_0015", |
|
job_description => "job_0017", |
|
job_progress => 0, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }}); |
|
} |
|
|
|
# Update the OUI data? |
|
my ($last_parse_oui, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-parse-oui"}); |
|
my $time_since_last_parse_oui = $last_parse_oui =~ /^\d+$/ ? time - $last_parse_oui : 100000; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:last_parse_oui' => $last_parse_oui, |
|
's2:time_since_last_parse_oui' => $time_since_last_parse_oui, |
|
}}); |
|
if ($time_since_last_parse_oui > 86400) |
|
{ |
|
# Yup. |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => "jobs::last-ran::striker-parse-oui", |
|
variable_value => time, |
|
variable_default => "0", |
|
variable_description => "striker_0305", |
|
variable_section => "jobs", |
|
variable_source_uuid => "NULL", |
|
variable_source_table => "", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ |
|
file => $THIS_FILE, |
|
line => __LINE__, |
|
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches, |
|
job_data => "", |
|
job_name => "oui-data::refresh", |
|
job_title => "job_0064", |
|
job_description => "job_0065", |
|
job_progress => 0, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); |
|
} |
|
|
|
# Scan the network? |
|
my ($last_network_scan, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-scan-network"}); |
|
my $time_since_last_network_scan = $last_network_scan =~ /^\d+$/ ? time - $last_network_scan : 100000; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:last_network_scan' => $last_network_scan, |
|
's2:time_since_last_network_scan' => $time_since_last_network_scan, |
|
}}); |
|
if ($time_since_last_parse_oui > 86400) |
|
{ |
|
# Yup. |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => "jobs::last-ran::striker-scan-network", |
|
variable_value => time, |
|
variable_default => "0", |
|
variable_description => "striker_0306", |
|
variable_section => "jobs", |
|
variable_source_uuid => "NULL", |
|
variable_source_table => "", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ |
|
file => $THIS_FILE, |
|
line => __LINE__, |
|
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches, |
|
job_data => "", |
|
job_name => "scan-network::refresh", |
|
job_title => "job_0066", |
|
job_description => "job_0067", |
|
job_progress => 0, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); |
|
} |
|
} |
|
|
|
# Update the next check time. |
|
$anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks}, |
|
"s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check}, |
|
}}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
### NOTE: This logic plays out in a slightly different way in Database->shutdown(). |
|
# Check for stale db_in_use states. |
|
sub check_db_in_use_states |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# We only reap db_in_use entries for us. |
|
$anvil->System->pids({debug => 2}); |
|
my $query = " |
|
SELECT |
|
state_uuid, |
|
state_name, |
|
state_note |
|
FROM |
|
states |
|
WHERE |
|
state_name LIKE 'db_in_use::%' |
|
AND |
|
state_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." |
|
;"; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); |
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); |
|
my $count = @{$results}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
results => $results, |
|
count => $count, |
|
}}); |
|
if ($count) |
|
{ |
|
foreach my $row (@{$results}) |
|
{ |
|
my $state_uuid = $row->[0]; |
|
my $state_name = $row->[1]; |
|
my $state_note = $row->[2]; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:state_uuid' => $state_uuid, |
|
's2:state_name' => $state_name, |
|
's3:state_note' => $state_note, |
|
}}); |
|
|
|
my $caller = ""; |
|
my ($db_uuid, $state_pid) = ($state_name =~ /db_in_use::(.*?)::(.*)$/); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:db_uuid' => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")", |
|
's2:state_pid' => $state_pid, |
|
}}); |
|
if ($state_pid =~ /(\d+)::(.*)$/) |
|
{ |
|
$state_pid = $1; |
|
$caller = $2; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:state_pid' => $state_pid, |
|
's2:caller' => $caller, |
|
}}); |
|
} |
|
|
|
if (not exists $anvil->data->{pids}{$state_pid}) |
|
{ |
|
# Reap the 'db_is_use'. |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { state_name => $state_name }}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => { |
|
db => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")", |
|
pid => $state_pid, |
|
'caller' => $caller, |
|
}}); |
|
|
|
my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";"; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); |
|
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__}); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This checks to see if any files in /mnt/shared need to be dealt with, like incorporating files in |
|
# /mnt/shared/incoming, etc. |
|
sub check_incoming |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-manage-files'}." --check"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
my ($output, $return_code) = $anvil->System->call({ |
|
shell_call => $shell_call, |
|
source => $THIS_FILE, |
|
line => __LINE__, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
|
|
return(0); |
|
} |
|
|
|
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config |
|
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing |
|
# anything. |
|
sub check_install_target |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $system_type = $anvil->Get->host_type(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }}); |
|
if ($system_type ne "striker") |
|
{ |
|
# Not a dashboard, nothing to do. |
|
return(0); |
|
} |
|
|
|
my $status = "unavailable"; |
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }}); |
|
foreach my $line (split/\n/, $output) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }}); |
|
|
|
if ($line =~ /status=(\d)/) |
|
{ |
|
my $digit = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }}); |
|
|
|
if ($digit == 0) |
|
{ |
|
$status = "disabled"; |
|
} |
|
elsif ($digit == 1) |
|
{ |
|
$status = "enabled"; |
|
} |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }}); |
|
last; |
|
} |
|
} |
|
|
|
# Record the status |
|
$anvil->Database->insert_or_update_variables({ |
|
variable_name => "install-target::enabled", |
|
variable_source_uuid => $anvil->Get->host_uuid, |
|
variable_source_table => "hosts", |
|
variable_value => $status, |
|
variable_default => "unavailable", |
|
variable_description => "striker_0110", |
|
variable_section => "system", |
|
}); |
|
|
|
return(0); |
|
} |
|
|
|
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the |
|
# daemon is restarted or reloaded. |
|
sub run_once |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Make sure the firewall is configured. |
|
$anvil->Network->manage_firewall(); |
|
|
|
# Check that the database is ready. |
|
prep_database($anvil); |
|
|
|
# Check to see if we need to do boot-time tasks. We only run these if we've just booted |
|
boot_time_tasks($anvil); |
|
|
|
# Check the ssh stuff. |
|
# NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well. |
|
$anvil->System->check_ssh_keys(); |
|
|
|
# Check setuid wrappers |
|
check_setuid_wrappers($anvil); |
|
|
|
# Check journald is configured for persistent storage. |
|
check_journald($anvil); |
|
|
|
if ($anvil->data->{switches}{'startup-only'}) |
|
{ |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub check_journald |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Check the journald.conf to ensure logging in configured to be persistent. |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }}); |
|
my $peristent_seen = 0; |
|
my $change_storage = 0; |
|
my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }}); |
|
foreach my $line (split/\n/, $old_journald_conf) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }}); |
|
if ($line =~ /^Storage=(.*)$/) |
|
{ |
|
my $value = $1; |
|
if ($value eq "persistent") |
|
{ |
|
$peristent_seen = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }}); |
|
} |
|
else |
|
{ |
|
$change_storage = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }}); |
|
} |
|
} |
|
} |
|
|
|
# Make sure the journald directory |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }}); |
|
if (not -d $anvil->data->{path}{directories}{journald}) |
|
{ |
|
$anvil->Storage->make_directory({ |
|
debug => 2, |
|
directory => $anvil->data->{path}{directories}{journald}, |
|
}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }}); |
|
} |
|
|
|
# Make sure the journald is configured for persistent storage. |
|
if (not $peristent_seen) |
|
{ |
|
my $storage_added = 0; |
|
my $new_journald_conf = ""; |
|
foreach my $line (split/\n/, $old_journald_conf) |
|
{ |
|
if (($line =~ /^Storage=/) && ($change_storage)) |
|
{ |
|
if (not $storage_added) |
|
{ |
|
$storage_added = 1; |
|
$new_journald_conf .= "Storage=persistent\n"; |
|
} |
|
next; |
|
} |
|
if (($line =~ /^#Storage=/) && (not $storage_added)) |
|
{ |
|
$storage_added = 1; |
|
$new_journald_conf .= "Storage=persistent\n"; |
|
} |
|
$new_journald_conf .= $line."\n"; |
|
} |
|
if (not $storage_added) |
|
{ |
|
$new_journald_conf .= "Storage=persistent\n"; |
|
} |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }}); |
|
|
|
$anvil->Storage->write_file({ |
|
debug => 3, |
|
secure => 0, |
|
file => $anvil->data->{path}{configs}{'journald.conf'}, |
|
body => $new_journald_conf, |
|
mode => "0644", |
|
overwrite => 1, |
|
}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }}); |
|
|
|
# Restart the journald service. |
|
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This creates, as needed, the setuid wrappers used by apache to make certain system calls. |
|
sub check_setuid_wrappers |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $host_type = $anvil->Get->host_type(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }}); |
|
if ($host_type ne "striker") |
|
{ |
|
# Not a dashboard, setuid scripts aren't needed. |
|
return(0); |
|
} |
|
|
|
# Does the call_striker-get-peer-data wrapper exist yet? |
|
if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}) |
|
{ |
|
# Exists, skipping. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }}); |
|
} |
|
else |
|
{ |
|
# What is the admin user and group ID? |
|
my $admin_uid = getpwnam('admin'); |
|
my $admin_gid = getgrnam('admin'); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
admin_uid => $admin_uid, |
|
admin_gid => $admin_gid, |
|
}}); |
|
next if not $admin_uid; |
|
next if not $admin_gid; |
|
|
|
# Write the body out |
|
my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n"; |
|
$call_striker_get_peer_data_body .= "main(ac, av)\n"; |
|
$call_striker_get_peer_data_body .= "char **av;\n"; |
|
$call_striker_get_peer_data_body .= "{\n"; |
|
$call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n"; |
|
$call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n"; |
|
$call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n"; |
|
$call_striker_get_peer_data_body .= "}\n"; |
|
my $error = $anvil->Storage->write_file({ |
|
debug => 3, |
|
file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c", |
|
body => $call_striker_get_peer_data_body, |
|
mode => '644', |
|
overwrite => 1, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }}); |
|
|
|
# If it wrote out, compile it. |
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c") |
|
{ |
|
# Failed to write. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }}); |
|
} |
|
else |
|
{ |
|
# Compile it |
|
my ($output, $return_code) = $anvil->System->call({ |
|
debug => 3, |
|
shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
|
|
# If it compiled, setuid it. |
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}) |
|
{ |
|
# Something went wrong compiling it. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }}); |
|
} |
|
else |
|
{ |
|
$anvil->Storage->change_owner({ |
|
debug => 3, |
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}, |
|
user => 'root', |
|
group => 'root', |
|
}); |
|
$anvil->Storage->change_mode({ |
|
debug => 3, |
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}, |
|
mode => '4755', |
|
}); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# Configure/update the firewall. |
|
sub check_firewall |
|
{ |
|
my ($anvil) = @_; |
|
|
|
return(0); |
|
|
|
# Don't call this if we're not configured yet. |
|
my $configured = $anvil->System->check_if_configured({debug => 3}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); |
|
|
|
# Check the firewall needs to be updated. |
|
if ($configured) |
|
{ |
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This handles tasks that need to run on boot (if any) |
|
sub boot_time_tasks |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# If the uptime is less than ten minutes, clear the reboot flag. |
|
my $uptime = $anvil->Get->uptime; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); |
|
|
|
# Now find out if a reboot is listed as needed and when it was last changed. |
|
my $reboot_needed = 0; |
|
my $changed_seconds_ago = 0; |
|
my $query = " |
|
SELECT |
|
variable_value, |
|
(SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago |
|
FROM |
|
variables |
|
WHERE |
|
variable_source_table = 'hosts' |
|
AND |
|
variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)." |
|
AND |
|
variable_name = 'reboot::needed' |
|
;"; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }}); |
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); |
|
my $count = @{$results}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
results => $results, |
|
count => $count, |
|
}}); |
|
if ($count) |
|
{ |
|
$reboot_needed = $results->[0]->[0]; |
|
$changed_seconds_ago = $results->[0]->[1]; |
|
$changed_seconds_ago =~ s/^(\d+)\..*$/$1/; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
reboot_needed => $reboot_needed, |
|
changed_seconds_ago => $changed_seconds_ago, |
|
}}); |
|
} |
|
|
|
### TODO: This shouldn't be needed anymore. anvil-manage-power doesn't set the progress to '50' prior |
|
### to reboot anymore. |
|
# If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was |
|
# set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to |
|
# Lisa Seelye (@thedoh) for this idea! |
|
my $difference = ($changed_seconds_ago - $uptime); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"s1:reboot_needed" => $reboot_needed, |
|
"s2:changed_seconds_ago" => $changed_seconds_ago, |
|
"s3:uptime" => $uptime, |
|
"s4:difference" => $difference, |
|
}}); |
|
if ($reboot_needed) |
|
{ |
|
if ($uptime < $changed_seconds_ago) |
|
{ |
|
# Clear the reboot request. |
|
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }}); |
|
|
|
# Check to see if there was a reboot job in progress. If so, finish it off. |
|
my $job_uuid = $anvil->Job->get_job_uuid({ |
|
debug => 2, |
|
program => "anvil-manage-power", |
|
incomplete => 1, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); |
|
|
|
if ($job_uuid) |
|
{ |
|
# Update the percentage to '100' and then clear the old PID. |
|
my $date_time = $anvil->Get->date_and_time(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }}); |
|
|
|
$anvil->Job->update_progress({ |
|
progress => 100, |
|
message => "message_0064,!!date_and_time!".$date_time."!!", |
|
job_uuid => $job_uuid, |
|
picked_up_by => 0, |
|
}); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
# Update our status |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"}); |
|
|
|
$anvil->Database->get_hosts({debug => 2}); |
|
my $host_uuid = $anvil->Get->host_uuid({debug => 2}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }}); |
|
$anvil->Database->insert_or_update_hosts({ |
|
debug => 2, |
|
host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi}, |
|
host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key}, |
|
host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}, |
|
host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}, |
|
host_uuid => $host_uuid, |
|
host_status => "online", |
|
}); |
|
|
|
# Make sure our stop reason is cleared. |
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({ |
|
variable_name => 'system::stop_reason', |
|
variable_value => '', |
|
variable_default => '', |
|
variable_description => 'striker_0279', |
|
variable_section => 'system', |
|
variable_source_uuid => $host_uuid, |
|
variable_source_table => 'hosts', |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }}); |
|
} |
|
|
|
# Make sure /etc/hosts is updated. |
|
$anvil->System->update_hosts(); |
|
|
|
# This handles weird bits for things like bug work-arounds. |
|
handle_special_cases($anvil); |
|
|
|
# Now look for jobs that have a job status of 'anvil_startup' |
|
run_jobs($anvil, 1); |
|
|
|
# Check the firewall needs to be updated. |
|
check_firewall($anvil); |
|
|
|
# If we're a striker, check apache |
|
my $host_type = $anvil->Get->host_type; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); |
|
if ($host_type eq "striker") |
|
{ |
|
$anvil->Striker->check_httpd_conf({debug => 3}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This handles weird bits for things like bug work-arounds. |
|
sub handle_special_cases |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Thsi is now handled by 'anvil-version-changes' |
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
|
|
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
states_output => $states_output, |
|
return_code => $return_code, |
|
}}); |
|
|
|
return(0); |
|
} |
|
|
|
# Configure the local database, if needed. |
|
sub prep_database |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# If there's a backup file, we're configured and possibly just off. |
|
my $prep_database = 1; |
|
foreach my $uuid (keys %{$anvil->data->{database}}) |
|
{ |
|
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$uuid.".sql"; |
|
$dump_file =~ s/\/\//\//g; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }}); |
|
if (-e $dump_file) |
|
{ |
|
# No need to prepare. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0665", variables => { file => $dump_file }}); |
|
$prep_database = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database }}); |
|
} |
|
} |
|
|
|
# Only run this if we're a dashboard. |
|
my $host_type = $anvil->Get->host_type(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); |
|
if ($host_type eq "striker") |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
prep_database => $prep_database, |
|
"sys::database::connections" => $anvil->data->{sys}{database}{connections}, |
|
}}); |
|
if ($prep_database) |
|
{ |
|
$anvil->Database->configure_pgsql({debug => 2}) |
|
# ### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging. |
|
# my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure"; |
|
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
# my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__ }); |
|
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
# database_output => $database_output, |
|
# return_code => $return_code, |
|
# }}); |
|
} |
|
elsif (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Start the daemon locally, if needed. |
|
my $running = $anvil->System->check_daemon({daemon => "postgresql"}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }}); |
|
if ($running == 2) |
|
{ |
|
# Not installed, nothing to do. |
|
} |
|
elsif (not $running) |
|
{ |
|
# Start it. |
|
my $return_code = $anvil->System->start_daemon({daemon => "postgresql"}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# These are tools that need to keep running. |
|
sub keep_running |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Check for jobs that were running and now exited. |
|
if ((not $anvil->data->{sys}{mapping_network}) && (exists $anvil->data->{processes})) |
|
{ |
|
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}}) |
|
{ |
|
# If it's not a handle, delete it. |
|
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, |
|
running => $running, |
|
}}); |
|
|
|
# If it's not running, update the table to clear the 'job_picked_up_by' column. |
|
if (not $running) |
|
{ |
|
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { |
|
job_uuid => $job_uuid, |
|
exit_status => $exit_status, |
|
}}); |
|
|
|
# Free up memory |
|
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup(); |
|
|
|
$anvil->Job->clear({job_uuid => $job_uuid}); |
|
} |
|
} |
|
} |
|
|
|
# If we're configured, write out the status JSON file. If we're not configured, Update hardware state files. |
|
my $configured = $anvil->System->check_if_configured; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }}); |
|
if ((not $anvil->data->{sys}{mapping_network}) && ($configured)) |
|
{ |
|
# Write out state information for all known Anvil! systems and the information from |
|
# unconfigured nods and DR hosts, using just database data (hence, fast enough to run |
|
# constantly). |
|
$anvil->System->generate_state_json({debug => 2}); |
|
} |
|
else |
|
{ |
|
# Run this to monitor the network in real time. |
|
update_state_file($anvil); |
|
} |
|
|
|
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process. |
|
run_jobs($anvil, 0) if not $anvil->data->{sys}{mapping_network}; |
|
|
|
return(0); |
|
} |
|
|
|
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made |
|
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is |
|
# invoked to handle it. |
|
sub run_jobs |
|
{ |
|
my ($anvil, $startup) = @_; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup => $startup }}); |
|
|
|
# Don't start jobs for 30 seconds after startup. |
|
if (not $startup) |
|
{ |
|
my $time_since_start = time - $start_time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
time_since_start => $time_since_start, |
|
start_time => $start_time, |
|
}}); |
|
if ($time_since_start < 60) |
|
{ |
|
# Log that we'll start jobs in X seconds. |
|
my $will_start_in = 60 - $time_since_start; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0326", variables => { will_start_in => $will_start_in }}); |
|
return(0); |
|
} |
|
} |
|
|
|
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's |
|
# changed on disk. |
|
$anvil->data->{sys}{jobs_running} = 0; |
|
|
|
# If we're not configured, we won't hold on starting jobs |
|
my $configured = $anvil->System->check_if_configured; |
|
|
|
# We'll also update the jobs.json file. |
|
my $jobs_file = "{\"jobs\":[\n"; |
|
|
|
# Get a list of pending or incomplete jobs. |
|
my $ended_within = $startup ? 1 : 300; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { ended_within => $ended_within }}); |
|
|
|
$anvil->Database->get_jobs({ |
|
debug => 2, |
|
ended_within => $ended_within, |
|
}); |
|
foreach my $modified_date (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { modified_date => $modified_date }}); |
|
foreach my $job_uuid (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}}) |
|
{ |
|
# Reload the jobs so we get an updated view of them. |
|
$anvil->Database->get_jobs({ |
|
debug => 2, |
|
ended_within => $ended_within, |
|
}); |
|
|
|
# Collect the data. |
|
my $job_command = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_command}; |
|
my $short_command = $job_command; |
|
$short_command =~ s/\s.*$//; |
|
my $job_data = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_data}; |
|
my $job_picked_up_by = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_by}; |
|
my $job_picked_up_at = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_at}; |
|
my $job_updated = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_updated}; |
|
my $job_name = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_name}; |
|
my $job_progress = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_progress}; |
|
my $job_title = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_title}; |
|
my $job_description = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_description}; |
|
my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status}; |
|
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0; |
|
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's01:job_uuid' => $job_uuid, |
|
's02:job_command' => $job_command, |
|
's03:short_command' => $short_command, |
|
's04:job_data' => $job_data, |
|
's05:job_picked_up_by' => $job_picked_up_by, |
|
's06:job_picked_up_at' => $job_picked_up_at, |
|
's07:job_updated' => $job_updated, |
|
's08:job_name' => $job_name, |
|
's09:job_progress' => $job_progress, |
|
's10:job_title' => $job_title, |
|
's11:job_description' => $job_description, |
|
's12:job_status' => $job_status, |
|
's13:started_seconds_ago' => $started_seconds_ago, |
|
's14:updated_seconds_ago' => $updated_seconds_ago, |
|
}}); |
|
|
|
# To minimize the chance of race conditions, any given command will be called only |
|
# once at a time. If two jobs of the same command exist, only one will be called. |
|
if ($job_progress != 100) |
|
{ |
|
if (exists $anvil->data->{sys}{started}{$short_command}) |
|
{ |
|
# Skip it. |
|
my $started_job = $anvil->data->{sys}{started}{$short_command}; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0737", variables => { |
|
started_job => $started_job, |
|
job_uuid => $job_uuid, |
|
command => $short_command, |
|
}}); |
|
next; |
|
} |
|
$anvil->data->{sys}{started}{$short_command} = $job_uuid; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::started::${short_command}" => $anvil->data->{sys}{started}{$short_command} }}); |
|
} |
|
|
|
# If this is a start-up call, only start jobs whose status is 'anvil_startup'. |
|
if (($startup) && ($configured) && ($job_status ne "anvil_startup")) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => { |
|
job_uuid => $job_uuid, |
|
job_command => $job_command, |
|
}}); |
|
next; |
|
} |
|
|
|
if ($job_progress == 100) |
|
{ |
|
# This is a job that might have just completed, clear the started value. |
|
$anvil->data->{jobs}{$job_uuid}{started} = 0; |
|
$job_picked_up_at = 0; |
|
$job_picked_up_by = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_picked_up_at => $job_picked_up_at, |
|
job_picked_up_by => $job_picked_up_by, |
|
"jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started}, |
|
}}); |
|
} |
|
else |
|
{ |
|
$anvil->data->{sys}{jobs_running} = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }}); |
|
} |
|
|
|
# See if the job was picked up by a now-dead instance. |
|
if ($job_picked_up_by) |
|
{ |
|
# Check if the PID is still active. |
|
$anvil->System->pids({ignore_me => 1}); |
|
|
|
### TODO: Add a check to verify the job isn't hung. |
|
# Skip if this job is in progress. |
|
if (not exists $anvil->data->{pids}{$job_picked_up_by}) |
|
{ |
|
# If the job is done, just clear the 'job_picked_up_by' and be done. |
|
if ($job_progress ne "100") |
|
{ |
|
# It's possible that the job updated to 100% and exited after |
|
# we gathered the job data, so we won't restart until we've |
|
# seen it not running and not at 100% after 5 loops. |
|
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid})) |
|
{ |
|
$anvil->data->{lost_job_count}{$job_uuid} = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); |
|
} |
|
if ($anvil->data->{lost_job_count}{$job_uuid} > 5) |
|
{ |
|
# The previous job is gone, but the job isn't |
|
# finished. Start it again. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => { |
|
command => $job_command, |
|
pid => $job_picked_up_by, |
|
percent => $job_progress, |
|
}}); |
|
|
|
# Clear some variables. |
|
$job_progress = 0; |
|
$job_status = "message_0056"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_progress => $job_progress, |
|
job_status => $job_status, |
|
}}); |
|
|
|
# Clear the job. |
|
$anvil->Job->clear({debug => 2, job_uuid => $job_uuid}); |
|
$anvil->data->{lost_job_count}{$job_uuid} = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); |
|
} |
|
else |
|
{ |
|
$anvil->data->{lost_job_count}{$job_uuid}++; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); |
|
} |
|
} |
|
|
|
# Clear the PID |
|
$job_picked_up_by = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }}); |
|
} |
|
elsif ($job_progress ne "100") |
|
{ |
|
# The job is running. |
|
$anvil->data->{jobs_started}{$short_command} = $job_uuid; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }}); |
|
} |
|
} |
|
|
|
# Convert the double-banged strings into a proper message. |
|
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : ""; |
|
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : ""; |
|
my $say_status = $job_status ? $anvil->Words->parse_banged_string({key_string => $job_status}) : ""; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
job_title => $job_title, |
|
say_description => $say_description, |
|
say_status => $say_status, |
|
}}); |
|
|
|
# Make the status HTML friendly. Strip any embedded HTML then encode the text string. |
|
if ($say_status) |
|
{ |
|
my $html_strip = HTML::Strip->new(); |
|
$say_status = $html_strip->parse($say_status); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); |
|
|
|
# Now make the resulting text string HTML friendly |
|
my $text_to_html = HTML::FromText->new({ |
|
urls => 1, |
|
email => 1, |
|
lines => 1, |
|
}); |
|
$say_status = $text_to_html->parse($say_status); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); |
|
} |
|
|
|
# Add this to the jobs.json file |
|
my $json_string = to_json ({ |
|
job_uuid => $job_uuid, |
|
job_command => $job_command, |
|
job_data => $job_data, |
|
job_picked_up_at => $job_picked_up_at, |
|
job_updated => $job_updated, |
|
job_name => $job_name, |
|
job_progress => $job_progress, |
|
job_title => $say_title, |
|
job_description => $say_description, |
|
job_status => $say_status, |
|
started_seconds_ago => $started_seconds_ago, |
|
updated_seconds_ago => $updated_seconds_ago, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }}); |
|
$jobs_file .= $json_string.",\n"; |
|
|
|
# If the job is done, move on. |
|
next if $job_progress == 100; |
|
next if $anvil->data->{switches}{'no-start'}; |
|
|
|
# If 'startup' is set, we only care if 'job_status' is 'anvil_startup' |
|
if ((not $startup) && ($say_status eq "anvil_startup")) |
|
{ |
|
# Skip this, it will run next time anvil-daemon restarts. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => { |
|
command => $job_command, |
|
job_uuid => $job_uuid, |
|
}}); |
|
next; |
|
} |
|
|
|
# If the job is not running, and we've not started any other of the same command this |
|
# loop, start it. |
|
if (not $job_picked_up_by) |
|
{ |
|
if (exists $anvil->data->{jobs_started}{$short_command}) |
|
{ |
|
# Is the job_uuid associated with this command done? |
|
my $started_job_uuid = $anvil->data->{jobs_started}{$short_command}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { started_job_uuid => $started_job_uuid }}); |
|
|
|
if (exists $anvil->data->{jobs}{running}{$started_job_uuid}) |
|
{ |
|
# If the previously running job and this job have the same |
|
# UUID, it failed and needs to restart. |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_uuid => $job_uuid, |
|
started_job_uuid => $started_job_uuid, |
|
"jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress}, |
|
}}); |
|
if ($started_job_uuid eq $job_uuid) |
|
{ |
|
# We're restarting. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => { |
|
command => $job_command, |
|
job_uuid => $job_uuid, |
|
}}); |
|
} |
|
elsif ($anvil->data->{jobs}{running}{$started_job_uuid}{job_progress} != 100) |
|
{ |
|
# Don't start it in this pass. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => { |
|
command => $job_command, |
|
this_job_uuid => $job_uuid, |
|
other_job_uuid => $started_job_uuid, |
|
}}); |
|
next; |
|
} |
|
else |
|
{ |
|
# The previous job is done, delete it. |
|
$anvil->data->{jobs_started}{$short_command} = ""; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command}, |
|
}}); |
|
} |
|
} |
|
} |
|
|
|
my $command = $job_command." --job-uuid ".$job_uuid; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }}); |
|
|
|
# Have we started this job recently? |
|
if (exists $anvil->data->{jobs}{$job_uuid}{started}) |
|
{ |
|
my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }}); |
|
|
|
if ($last_start < 60) |
|
{ |
|
# Skip, Started too recently. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => { |
|
command => $command, |
|
last_start => $last_start, |
|
}}); |
|
next; |
|
} |
|
} |
|
|
|
# Start the job, appending '--job-uuid' to the command. |
|
($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({ |
|
background => 1, |
|
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout", |
|
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr", |
|
shell_call => $command, |
|
source => $THIS_FILE, |
|
line => __LINE__, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, |
|
return_code => $return_code, |
|
}}); |
|
|
|
# Log the PID (the job should update the database). |
|
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); |
|
|
|
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute. |
|
$anvil->data->{jobs}{$job_uuid}{started} = time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }}); |
|
|
|
# Record that a job with this command has started |
|
$anvil->data->{jobs_started}{$short_command} = $job_uuid; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }}); |
|
} |
|
} |
|
} |
|
|
|
# Close the jobs file. |
|
$jobs_file =~ s/,\n$/\n/ms; |
|
$jobs_file .= "]}\n"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }}); |
|
|
|
# Write the JSON file |
|
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }}); |
|
$anvil->Storage->write_file({ |
|
file => $output_json, |
|
body => $jobs_file, |
|
overwrite => 1, |
|
backup => 0, |
|
mode => "0644", |
|
user => "apache", |
|
group => "apache", |
|
}); |
|
|
|
return(0); |
|
} |
|
|
|
# |
|
sub check_files |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Make sure the shared directories exist. |
|
foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}}) |
|
{ |
|
my $directory = $anvil->data->{path}{directories}{shared}{$target}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
target => $target, |
|
directory => $directory, |
|
}}); |
|
if (not -e $anvil->data->{path}{directories}{shared}{$target}) |
|
{ |
|
my $failed = $anvil->Storage->make_directory({ |
|
directory => $directory, |
|
group => "apache", |
|
user => "apache", |
|
mode => "0775", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }}); |
|
if ($failed) |
|
{ |
|
# Something went wrong. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => { |
|
directory => $directory, |
|
}}); |
|
} |
|
else |
|
{ |
|
# Success |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => { |
|
directory => $directory, |
|
}}); |
|
} |
|
} |
|
} |
|
|
|
# Look for files on our system that are in file_locations. If they're shown as ready, make sure |
|
# they're there. If they're marked as not ready, see if they now are. |
|
$anvil->Storage->check_files({debug => 2}); |
|
|
|
return(0); |
|
} |
|
|
|
# This calls 'anvil-update-states' which will scan the local machine's state (hardware and software) and |
|
# record write it out to an HTML file |
|
sub update_state_file |
|
{ |
|
my ($anvil) = @_; |
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0480"}); |
|
|
|
#my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}.$anvil->Log->switches; |
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); |
|
|
|
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
states_output => $states_output, |
|
return_code => $return_code, |
|
}}); |
|
|
|
return(0); |
|
}
|
|
|