anvil/tools/anvil-daemon
digimer 8c1c0597da Updated anvil-daemon to run anvil-configure-host in the foreground.
Signed-off-by: digimer <mkelly@alteeve.ca>
2024-05-30 14:49:02 -04:00

2050 lines
82 KiB
Perl
Executable File

#!/usr/bin/perl
#
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster
# nodes and DR hosts.
#
# Exit codes;
# 0 = Normal exit or md5sum of this program changed and it exited to reload.
# 1 = Not running as root.
# 2 = Unable to connect to any database, even after trying to initialize the local system.
#
# TODO:
# - Need to check what kind of machine this is and not prep the database unless its a dashboard.
# - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs
# - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good
# idea or not.
# - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/
# - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled.
# - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU=
# - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes
# -
# - Increase DRBD's default timeout
# - Check for and enable persistent journald logging
#
# NOTE:
# - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the
# power button. Might be useful in ScanCore down the road.
#
use strict;
use warnings;
use Anvil::Tools;
use Proc::Simple;
#use Time::HiRes qw ( time sleep );
use JSON;
use HTML::Strip;
use HTML::FromText;
use Data::Dumper;
use Text::Diff;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
$< = $>;
$( = $);
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
# in the loop as well to override defaults in code.
my $anvil = Anvil::Tools->new();
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
# If, so some reason, anvil.conf is lost, create it.
$anvil->System->_check_anvil_conf();
# If dnf is running, hold.
$anvil->System->wait_on_dnf();
# If we've got bonds, wait for them to be up. Then wait for NetworkManager to be up.
$anvil->Network->wait_on_nm_online({debug => 2});
$anvil->Network->wait_for_network({debug => 2});
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect({
check_if_configured => 1,
check_for_resync => 2,
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
# If I have no databases, sleep for a second and then exit (systemd will restart us).
if (not $anvil->data->{sys}{database}{connections})
{
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
# dashboard, then just go into a loop waiting for a database to be configured.
if ($anvil->Get->host_type eq "striker")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"});
prep_database($anvil);
# Try connecting again
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 2});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# Still nothing, sleep and exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"});
$anvil->nice_exit({exit_code => 2});
}
}
else
{
# Striker can't initialize us unless it can ssh into us, so make sure root login is enabled.
chech_sshd($anvil);
# Wait until we have one.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"});
until($anvil->data->{sys}{database}{connections})
{
sleep 10;
check_network($anvil);
$anvil->refresh();
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 2});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"});
}
}
}
}
# Read switches
$anvil->Get->switches({list => [
"clear-mapping",
"refresh-json",
"run-once",
"main-loop-only",
"no-start",
"startup-only"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
if ($anvil->data->{switches}{'refresh-json'})
{
$anvil->data->{switches}{'run-once'} = 1;
$anvil->data->{switches}{'main-loop-only'} = 1;
$anvil->data->{switches}{'no-start'} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::run-once" => $anvil->data->{switches}{'run-once'},
"switches::main-loop-only" => $anvil->data->{switches}{'main-loop-only'},
"switches::no-start" => $anvil->data->{switches}{'no-start'},
}});
}
# This is used to track initial checkes / repairs of network issues.
$anvil->data->{sys}{network}{initial_checks} = 0;
# We use this to delay starting jobs for a short time.
our $start_time = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }});
# There are some things we only want to run on (re)start and don't need to always run.
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'};
# Calculate my sum so that we can exit if it changes later.
$anvil->Storage->record_md5sums;
# What time is it, Mr. Fox?
my $now_time = time;
# To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest
# host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay.
my $delay = set_delay($anvil);
# Once a minute, we'll check the md5sums and see if we should restart.
# Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards).
$anvil->data->{timing}{minute_checks} = 60;
$anvil->data->{timing}{ten_minute_checks} = 600;
$anvil->data->{timing}{hourly_checks} = 3600;
$anvil->data->{timing}{daily_checks} = 86400;
$anvil->data->{timing}{repo_update_interval} = 86400;
$anvil->data->{timing}{next_minute_check} = $now_time - 1;
$anvil->data->{timing}{next_hourly_check} = $now_time - 1;
$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1;
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s01:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
"s02:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
"s03:timing::hourly_checks" => $anvil->data->{timing}{hourly_checks},
"s04:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
"s05:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval},
"s06:now_time" => $now_time,
"s07:delay" => $delay,
"s08:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
"s09:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
"s10:timing::next_hourly_check" => $anvil->data->{timing}{next_hourly_check},
"s11:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
}});
# Disconnect. We'll reconnect inside the loop
$anvil->Database->disconnect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"});
# This will prevent restarting while jobs are running.
$anvil->data->{sys}{jobs_running} = 0;
# When we periodically check if system files have changed, we'll also ask Database>connect() to check if it
# needs to be configured or updated. This is done periodically as it is expensive to run on every loop.
my $check_if_database_is_configured = 0;
# These are the things we always want running.
while(1)
{
# Reload defaults, re-read the config and then connect to the database(s)
$anvil->refresh();
# If, for some reason, anvil.conf is lost, create it.
$anvil->System->_check_anvil_conf();
$anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 2});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
# Mark that we don't want to check the database now.
$check_if_database_is_configured = 0;
if ($anvil->data->{sys}{database}{connections})
{
# Run the normal tasks
keep_running($anvil);
# Handle periodic tasks
handle_periodic_tasks($anvil);
}
# Exit if 'run-once' selected.
if ($anvil->data->{switches}{'run-once'})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"});
$anvil->nice_exit({exit_code => 0});
}
# Disconnect from the database(s) and sleep now.
$anvil->Database->disconnect();
sleep(2);
}
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# If we're using too much ram, send an alert and exit.
sub check_ram
{
my ($anvil) = @_;
# Problem 0 == ok, 1 == too much ram used, 2 == no pid found
my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
problem => $problem,
ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")",
}});
if ($problem)
{
# See if any jobs are running, and if so, hold because those jobs might be doing things (like
# OS updates or file syncs) that could make anvil-daemon appear to be using more memory.
$anvil->Database->get_jobs({debug => 2});
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}})
{
my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command};
my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_command => $job_command,
job_progress => $job_progress,
}});
if (($job_progress != 100) && ($job_progress != 0))
{
# Don't abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => {
job_command => $job_command,
job_progress => $job_progress,
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
}});
return(0);
}
}
# Send an alert and exit.
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => {
program => $THIS_FILE,
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
}, set_by => $THIS_FILE, sort_position => 0});
$anvil->Email->send_alerts();
# Log the same
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => {
program => $THIS_FILE,
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
}});
# Exit with RC0 so that systemctl restarts
$anvil->nice_exit({exit_code => 0});
}
return(0);
}
# This decides if the local system will delay daily runs on start-up.
sub set_delay
{
my ($anvil) = @_;
my $delay = 7200;
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type eq "striker")
{
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::host_uuid" => $anvil->data->{sys}{host_uuid},
uuid => $uuid,
}});
if ($uuid eq $anvil->data->{sys}{host_uuid})
{
$delay = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { delay => $delay }});
}
last;
}
}
else
{
# Not a dashboard, don't delay
$delay = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
}
return($delay);
}
# This checks to see if it's time to see if the network is ok and, if the system has been up long enough,
# checks and tries to repair network issues.
sub check_network
{
my ($anvil) = @_;
### TODO: Remove this when EL8 support is dropped. This was an issue with the old ifcfg configured bonds
# The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such,
# we'll start watching the network after the uptime is 2 minutes.
my $uptime = $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
if ($uptime > 120)
{
# Check that bonds are up. Degraded bonds will be left alone.
if (not $anvil->data->{sys}{network}{initial_checks})
{
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
if (not $running)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }});
my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
}
$anvil->data->{sys}{network}{initial_checks} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks},
}});
}
check_firewall($anvil);
}
# Check that all users can ping.
if (1)
{
my $shell_call = $anvil->data->{path}{exe}{sysctl}." net.ipv4.ping_group_range";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }});
if ($output =~ /net.ipv4.ping_group_range = (\d+)\t(\d+)$/)
{
my $lowest_uid = $1;
my $highest_uid = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
lowest_uid => $lowest_uid,
highest_uid => $highest_uid,
}});
if ($highest_uid < 2000)
{
# Tell the user we're enabling ping for all users.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0683"});
my $shell_call = $anvil->data->{path}{exe}{sysctl}." -w net.ipv4.ping_group_range=\"0 2147483647\"";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { output => $output }});
}
}
}
# Check that there's at least one entry in 'network_interfaces' and, if not, call scan-network.
if (1)
{
my $query = "SELECT COUNT(*) FROM network_interfaces WHERE network_interface_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid).";";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { count => $count }});
if (not $count)
{
# Run scan-network
my $shell_call = $anvil->data->{path}{directories}{scan_agents}."/scan-network/scan-network".$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
return(0);
}
# This handles running tasks that only run on some loops.
sub handle_periodic_tasks
{
my ($anvil) = @_;
my $now_time = time;
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:now_time" => $now_time,
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
"s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
"s4:timing::next_hourly_check" => $anvil->data->{timing}{next_hourly_check},
"s5:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
"s6:host_type" => $host_type,
}});
# Time to run once per minute tasks.
if ($now_time >= $anvil->data->{timing}{next_minute_check})
{
# Check the firewall needs to be updated.
check_network($anvil);
# Check to see if the PXE environment needs to be updated.
check_install_target($anvil);
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys.
$anvil->System->check_ssh_keys({debug => 2});
$anvil->System->update_hosts({debug => 2});
# Check if the files on disk have changed. Even if it is time to check, don't if a job is
# running.
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums))
{
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"});
$anvil->nice_exit({exit_code => 0});
}
# Mark that we want to check the database config next time.
$check_if_database_is_configured = 1;
# Update the next check time.
$anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
}});
# Even when this runs, it should finish in under ten seconds so we don't need to background it.
my $shell_call = $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($parse_output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
parse_output => $parse_output,
return_code => $return_code,
}});
# Check shared files.
check_files($anvil);
# Check mail server config.
my $problem = $anvil->Email->check_config({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
# Check if anything is needed to be done in /mnt/shared.
check_incoming($anvil);
# Check for stale db_in_use states.
check_db_in_use_states($anvil);
# Do Striker-specific minute tasks
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type eq "striker")
{
# Look for duplicates if we're the primary DB.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::database::primary_db" => $anvil->data->{sys}{database}{primary_db},
"Get->host_uuid" => $anvil->Get->host_uuid,
}});
if ($anvil->Get->host_uuid eq $anvil->data->{sys}{database}{primary_db})
{
$anvil->Database->_check_for_duplicates({debug => 2});
}
# Something is causing broken manifests to be created. Until found, this removes them.
check_for_broken_manifests($anvil);
# This can take a while, but it's been optimized to minimize how long it takes to
# run. To be safe, we'll still background it.
my $shell_call = $anvil->data->{path}{exe}{'striker-get-screenshots'}.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({
background => 1,
shell_call => $shell_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
# Now check to see if it's time to run less frequent tasks.
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check})
{
my $host_type = $anvil->Get->host_type();
my $host_uuid = $anvil->Get->host_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_type => $host_type,
host_uuid => $host_uuid,
}});
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut
# down our database.
if ($host_type eq "striker")
{
# If we're the active database, dump our database out and rsync it to our peers.
my $peers = keys %{$anvil->data->{database}};
my $connections = $anvil->data->{sys}{database}{connections};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peers => $peers,
connections => $connections,
}});
if (exists $anvil->data->{cache}{database_handle}{$host_uuid})
{
# Verify that the database is up.
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
if ($running)
{
# Backup our DB.
my $dump_file = $anvil->Database->backup_database({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
# Now rsync it to our peer(s)
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
{
next if $this_host_uuid eq $host_uuid;
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/";
my $password = $anvil->data->{database}{$this_host_uuid}{password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
this_host_uuid => $this_host_uuid,
destination => $destination,
password => $anvil->Log->is_secure($password),
}});
my $start_time = time;
my $failed = $anvil->Storage->rsync({
debug => 3,
destination => $destination,
password => $password,
source => $dump_file,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
my $rsync_time = time - $start_time;
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}});
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}});
my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => {
file => $dump_file,
host_name => $target_name,
took => $rsync_time,
size => $size,
size_bytes => $size_bytes,
}});
}
}
}
}
# Reap old db_in_use states over 6 hours old.
my $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND modified_date < (SELECT now() - interval '6 hour');\n";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__});
# Update the next check time.
$anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
"s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
}});
}
# Now check to see if it's time to run hourly tasks.
if ($now_time >= $anvil->data->{timing}{next_hourly_check})
{
# Check how much RAM we're using.
check_ram($anvil);
# Update the next check time.
$anvil->data->{timing}{next_hourly_check} = $now_time + $anvil->data->{timing}{hourly_checks};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:timing::hourly_checks" => $anvil->data->{timing}{hourly_checks},
"s2:timing::next_hourly_check" => $anvil->data->{timing}{next_hourly_check},
}});
}
# Now check to see if it's time to run daily tasks.
if ($now_time >= $anvil->data->{timing}{next_daily_check})
{
# Make sure ksm, ksmtuned and tuned are disabled.
foreach my $daemon ("ksm.service", "ksmtuned.service", "tuned.service")
{
my $status = $anvil->System->check_daemon({daemon => $daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
daemon => $daemon,
status => $status,
}});
if ($status eq "1")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0145", variables => { daemon => $daemon }});
$anvil->System->disable_daemon({
now => 1,
daemon => $daemon,
});
}
}
### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we
### don't use '--force' and let striker-manage-install-target skip the repo update if it happened
### recently enough.
if ($host_type eq "striker")
{
### TODO: This is here only to handle the period of time where we disabled postgres
### on boot. This should be removed sometime after 2022-08-01
#$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
# Record a job, don't call it directly. It takes too long to run.
my $host_uuid = $anvil->Get->host_uuid();
my ($last_age_out, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::aged-out"});
my $time_since_last_age_out = $last_age_out =~ /^\d+$/ ? time - $last_age_out : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:host_uuid' => $host_uuid,
's2:last_age_out' => $last_age_out,
's3:time_since_last_age_out' => $time_since_last_age_out,
}});
# Run an age-out?
if ($time_since_last_age_out > 86400)
{
# Age out old data. This takes up to a minute.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "database::".$host_uuid."::aged-out",
variable_value => time,
variable_default => "0",
variable_description => "striker_0302",
variable_section => "database",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
$anvil->Database->_age_out_data();
}
# Run an archive?
my ($last_archive, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::archived"});
my $time_since_last_archive = $last_archive =~ /^\d+$/ ? time - $last_archive : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_archive' => $last_archive,
's2:time_since_last_archive' => $time_since_last_archive,
}});
if ($time_since_last_archive > 86400)
{
# Archive old data
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "database::".$host_uuid."::archived",
variable_value => time,
variable_default => "0",
variable_description => "striker_0303",
variable_section => "database",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
$anvil->Database->archive_database();
}
# Run the install target update?
my ($last_mit, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target"});
my $time_since_last_mit = $last_mit =~ /^\d+$/ ? time - $last_mit : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_mit' => $last_mit,
's2:time_since_last_mit' => $time_since_last_mit,
}});
if ($time_since_last_mit > 86400)
{
# Update the local install target data.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target",
variable_value => time,
variable_default => "0",
variable_description => "striker_0304",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
job_data => "",
job_name => "install-target::refresh",
job_title => "job_0015",
job_description => "job_0017",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }});
}
# Update the OUI data?
my ($last_parse_oui, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-parse-oui"});
my $time_since_last_parse_oui = $last_parse_oui =~ /^\d+$/ ? time - $last_parse_oui : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_parse_oui' => $last_parse_oui,
's2:time_since_last_parse_oui' => $time_since_last_parse_oui,
}});
if ($time_since_last_parse_oui > 86400)
{
# Yup.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::striker-parse-oui",
variable_value => time,
variable_default => "0",
variable_description => "striker_0305",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
job_data => "",
job_name => "oui-data::refresh",
job_title => "job_0064",
job_description => "job_0065",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
}
# Scan the network?
my ($last_network_scan, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-scan-network"});
my $time_since_last_network_scan = $last_network_scan =~ /^\d+$/ ? time - $last_network_scan : 100000;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:last_network_scan' => $last_network_scan,
's2:time_since_last_network_scan' => $time_since_last_network_scan,
}});
if ($time_since_last_parse_oui > 86400)
{
# Yup.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => "jobs::last-ran::striker-scan-network",
variable_value => time,
variable_default => "0",
variable_description => "striker_0306",
variable_section => "jobs",
variable_source_uuid => "NULL",
variable_source_table => "",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
file => $THIS_FILE,
line => __LINE__,
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
job_data => "",
job_name => "scan-network::refresh",
job_title => "job_0066",
job_description => "job_0067",
job_progress => 0,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
}
}
# Update the next check time.
$anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
"s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
}});
}
return(0);
}
### TODO: Find the source of the problem and fix it properly.
sub check_for_broken_manifests
{
my ($anvil) = @_;
my $query = "
SELECT
manifest_uuid
FROM
manifests
WHERE
manifest_name = '-anvil-'
;";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results,
count => $count,
}});
if ($count)
{
foreach my $row (@{$results})
{
my $manifest_uuid = $row->[0];
my $queries = [];
push @{$queries}, "DELETE FROM history.manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";";
push @{$queries}, "DELETE FROM manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";";
foreach my $query (sort {$a cmp $b} @{$queries})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }});
}
$anvil->Database->write({debug => 2, query => $queries, source => $THIS_FILE, line => __LINE__});
}
}
return(0);
}
### NOTE: This logic plays out in a slightly different way in Database->shutdown().
# Check for stale db_in_use states.
sub check_db_in_use_states
{
my ($anvil) = @_;
# We only reap db_in_use entries for us.
$anvil->System->pids({debug => 2});
my $query = "
SELECT
state_uuid,
state_name,
state_note
FROM
states
WHERE
state_name LIKE 'db_in_use::%'
AND
state_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
;";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results,
count => $count,
}});
if ($count)
{
foreach my $row (@{$results})
{
my $state_uuid = $row->[0];
my $state_name = $row->[1];
my $state_note = $row->[2];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:state_uuid' => $state_uuid,
's2:state_name' => $state_name,
's3:state_note' => $state_note,
}});
my $caller = "";
my ($db_uuid, $state_pid) = ($state_name =~ /db_in_use::(.*?)::(.*)$/);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:db_uuid' => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")",
's2:state_pid' => $state_pid,
}});
if ($state_pid =~ /(\d+)::(.*)$/)
{
$state_pid = $1;
$caller = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:state_pid' => $state_pid,
's2:caller' => $caller,
}});
}
if (not exists $anvil->data->{pids}{$state_pid})
{
# Reap the 'db_is_use'.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { state_name => $state_name }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => {
db => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")",
pid => $state_pid,
'caller' => $caller,
}});
my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__});
}
}
}
return(0);
}
# This checks to see if any files in /mnt/shared need to be dealt with, like incorporating files in
# /mnt/shared/incoming, etc.
sub check_incoming
{
my ($anvil) = @_;
my $shell_call = $anvil->data->{path}{exe}{'anvil-manage-files'}." --check".$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({
shell_call => $shell_call,
source => $THIS_FILE,
line => __LINE__,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
return(0);
}
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing
# anything.
sub check_install_target
{
my ($anvil) = @_;
my $system_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }});
if ($system_type ne "striker")
{
# Not a dashboard, nothing to do.
return(0);
}
my $status = "unavailable";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }});
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
if ($line =~ /status=(\d)/)
{
my $digit = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }});
if ($digit == 0)
{
$status = "disabled";
}
elsif ($digit == 1)
{
$status = "enabled";
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }});
last;
}
}
# Record the status
$anvil->Database->insert_or_update_variables({
variable_name => "install-target::enabled",
variable_source_uuid => $anvil->Get->host_uuid,
variable_source_table => "hosts",
variable_value => $status,
variable_default => "unavailable",
variable_description => "striker_0110",
variable_section => "system",
});
return(0);
}
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the
# daemon is restarted or reloaded.
sub run_once
{
my ($anvil) = @_;
# Make sure the firewall is configured.
$anvil->Network->manage_firewall();
# Check that the database is ready.
prep_database($anvil);
# Check to see if we need to do boot-time tasks. We only run these if we've just booted
boot_time_tasks($anvil);
# Check the ssh stuff.
# NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well.
$anvil->System->check_ssh_keys();
# Check setuid wrappers
check_setuid_wrappers($anvil);
# Check journald is configured for persistent storage.
check_journald($anvil);
# Make sure root can ssh
chech_sshd($anvil);
if ($anvil->data->{switches}{'startup-only'})
{
$anvil->nice_exit({exit_code => 0});
}
return(0);
}
sub chech_sshd
{
my ($anvil) = @_;
# On EL8, the 'sshd_config.d' directory doesn't exist and root is enabled.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'path::directories::sshd_config.d' => $anvil->data->{path}{directories}{'sshd_config.d'} }});
if (not -d $anvil->data->{path}{directories}{'sshd_config.d'})
{
return(0);
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'path::configs::sshd_root_password' => $anvil->data->{path}{configs}{sshd_root_password} }});
if (not -f $anvil->data->{path}{configs}{sshd_root_password})
{
# Write it out
my $body = "# This file was added to enable root login by password, which is needed while
# forming the Anvil! cluster. Once the cluster is formed, passwordless SSH
# should be enabled and you can disable this feature. Please remove during a
# maintanence window or after testing in a lab environment.
PermitRootLogin yes
";
# Update the config
$anvil->Storage->write_file({
debug => 2,
secure => 0,
file => $anvil->data->{path}{configs}{sshd_root_password},
body => $body,
mode => "0644",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0418", variables => { file => $anvil->data->{path}{configs}{sshd_root_password} }});
# Restart the journald service.
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart sshd.service";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
return(0);
}
sub check_journald
{
my ($anvil) = @_;
# Check the journald.conf to ensure logging in configured to be persistent.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }});
my $peristent_seen = 0;
my $change_storage = 0;
my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }});
foreach my $line (split/\n/, $old_journald_conf)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
if ($line =~ /^Storage=(.*)$/)
{
my $value = $1;
if ($value eq "persistent")
{
$peristent_seen = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }});
}
else
{
$change_storage = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }});
}
}
}
# Make sure the journald directory
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }});
if (not -d $anvil->data->{path}{directories}{journald})
{
$anvil->Storage->make_directory({
debug => 2,
directory => $anvil->data->{path}{directories}{journald},
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }});
}
# Make sure the journald is configured for persistent storage.
if (not $peristent_seen)
{
my $storage_added = 0;
my $new_journald_conf = "";
foreach my $line (split/\n/, $old_journald_conf)
{
if (($line =~ /^Storage=/) && ($change_storage))
{
if (not $storage_added)
{
$storage_added = 1;
$new_journald_conf .= "Storage=persistent\n";
}
next;
}
if (($line =~ /^#Storage=/) && (not $storage_added))
{
$storage_added = 1;
$new_journald_conf .= "Storage=persistent\n";
}
$new_journald_conf .= $line."\n";
}
if (not $storage_added)
{
$new_journald_conf .= "Storage=persistent\n";
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }});
$anvil->Storage->write_file({
debug => 3,
secure => 0,
file => $anvil->data->{path}{configs}{'journald.conf'},
body => $new_journald_conf,
mode => "0644",
overwrite => 1,
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }});
# Restart the journald service.
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
return(0);
}
# This creates, as needed, the setuid wrappers used by striker-ui-api to make certain system calls.
sub check_setuid_wrappers
{
my ($anvil) = @_;
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
if ($host_type ne "striker")
{
# Not a dashboard, setuid scripts aren't needed.
return(0);
}
# Does the call_striker-get-peer-data wrapper exist yet?
if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
{
# Exists, skipping.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }});
}
else
{
# What is the admin user and group ID?
my $admin_uid = getpwnam('admin');
my $admin_gid = getgrnam('admin');
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
admin_uid => $admin_uid,
admin_gid => $admin_gid,
}});
next if not $admin_uid;
next if not $admin_gid;
# Write the body out
my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n";
$call_striker_get_peer_data_body .= "main(ac, av)\n";
$call_striker_get_peer_data_body .= "char **av;\n";
$call_striker_get_peer_data_body .= "{\n";
$call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n";
$call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n";
$call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n";
$call_striker_get_peer_data_body .= "}\n";
my $error = $anvil->Storage->write_file({
debug => 3,
file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
body => $call_striker_get_peer_data_body,
mode => '644',
overwrite => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }});
# If it wrote out, compile it.
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c")
{
# Failed to write.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
}
else
{
# Compile it
my ($output, $return_code) = $anvil->System->call({
debug => 3,
shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
output => $output,
return_code => $return_code,
}});
# If it compiled, setuid it.
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
{
# Something went wrong compiling it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
}
else
{
$anvil->Storage->change_owner({
debug => 3,
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
user => 'root',
group => 'root',
});
$anvil->Storage->change_mode({
debug => 3,
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
mode => '4755',
});
}
}
}
return(0);
}
# Configure/update the firewall.
sub check_firewall
{
my ($anvil) = @_;
return(0);
# Don't call this if we're not configured yet.
my $configured = $anvil->System->check_if_configured({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
# Check the firewall needs to be updated.
if ($configured)
{
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }});
}
return(0);
}
# This handles tasks that need to run on boot (if any)
sub boot_time_tasks
{
my ($anvil) = @_;
# If the uptime is less than ten minutes, clear the reboot flag.
my $uptime = $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }});
# Now find out if a reboot is listed as needed and when it was last changed.
my $reboot_needed = 0;
my $changed_seconds_ago = 0;
my $query = "
SELECT
variable_value,
(SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago
FROM
variables
WHERE
variable_source_table = 'hosts'
AND
variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
AND
variable_name = 'reboot::needed'
;";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results,
count => $count,
}});
if ($count)
{
$reboot_needed = $results->[0]->[0];
$changed_seconds_ago = $results->[0]->[1];
$changed_seconds_ago =~ s/^(\d+)\..*$/$1/;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
reboot_needed => $reboot_needed,
changed_seconds_ago => $changed_seconds_ago,
}});
}
### TODO: This shouldn't be needed anymore. anvil-manage-power doesn't set the progress to '50' prior
### to reboot anymore.
# If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was
# set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to
# Lisa Seelye (@thedoh) for this idea!
my $difference = ($changed_seconds_ago - $uptime);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:reboot_needed" => $reboot_needed,
"s2:changed_seconds_ago" => $changed_seconds_ago,
"s3:uptime" => $uptime,
"s4:difference" => $difference,
}});
if ($reboot_needed)
{
if ($uptime < $changed_seconds_ago)
{
# Clear the reboot request.
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
# Check to see if there was a reboot job in progress. If so, finish it off.
my $job_uuid = $anvil->Job->get_job_uuid({
debug => 2,
program => "anvil-manage-power",
incomplete => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
if ($job_uuid)
{
# Update the percentage to '100' and then clear the old PID.
my $date_time = $anvil->Get->date_and_time();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }});
$anvil->Job->update_progress({
progress => 100,
message => "message_0064,!!date_and_time!".$date_time."!!",
job_uuid => $job_uuid,
picked_up_by => 0,
});
}
}
}
else
{
# Update our status
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"});
$anvil->Database->get_hosts({debug => 2});
my $host_uuid = $anvil->Get->host_uuid({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }});
# Now update that we're online.
$anvil->Database->insert_or_update_hosts({
debug => 2,
host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi},
host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key},
host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name},
host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type},
host_uuid => $host_uuid,
host_status => "online",
});
# Make sure our stop reason is cleared.
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => 'system::stop_reason',
variable_value => '',
variable_default => '',
variable_description => 'striker_0279',
variable_section => 'system',
variable_source_uuid => $host_uuid,
variable_source_table => 'hosts',
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }});
}
# Make sure /etc/hosts is updated.
$anvil->System->update_hosts({debug => 2});
# This handles weird bits for things like bug work-arounds.
handle_special_cases($anvil);
# Now look for jobs that have a job status of 'anvil_startup'
run_jobs($anvil, 1);
# Check the firewall needs to be updated.
check_firewall($anvil);
return(0);
}
# This handles weird bits for things like bug work-arounds.
sub handle_special_cases
{
my ($anvil) = @_;
# Thsi is now handled by 'anvil-version-changes'
my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
states_output => $states_output,
return_code => $return_code,
}});
return(0);
}
# Configure the local database, if needed.
sub prep_database
{
my ($anvil) = @_;
# If there's a backup file, we're configured and possibly just off.
my $prep_database = 1;
foreach my $uuid (keys %{$anvil->data->{database}})
{
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$uuid.".sql";
$dump_file =~ s/\/\//\//g;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
if (-e $dump_file)
{
# No need to prepare.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0665", variables => { file => $dump_file }});
$prep_database = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database }});
}
}
# Only run this if we're a dashboard.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type eq "striker")
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
prep_database => $prep_database,
"sys::database::connections" => $anvil->data->{sys}{database}{connections},
}});
if ($prep_database)
{
$anvil->Database->configure_pgsql({debug => 2})
# ### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging.
# my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}$anvil->Log->switches;
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
# my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__ });
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
# database_output => $database_output,
# return_code => $return_code,
# }});
}
elsif (not $anvil->data->{sys}{database}{connections})
{
# Start the daemon locally, if needed.
my $running = $anvil->System->check_daemon({daemon => "postgresql"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }});
if ($running == 2)
{
# Not installed, nothing to do.
}
elsif (not $running)
{
# Start it.
my $return_code = $anvil->System->start_daemon({daemon => "postgresql"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
}
}
}
return(0);
}
# These are tools that need to keep running.
sub keep_running
{
my ($anvil) = @_;
# Check for jobs that were running and now exited.
if (exists $anvil->data->{processes})
{
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}})
{
# If it's not a handle, delete it.
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
running => $running,
}});
# If it's not running, update the table to clear the 'job_picked_up_by' column.
if (not $running)
{
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
job_uuid => $job_uuid,
exit_status => $exit_status,
}});
# Free up memory
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup();
$anvil->Job->clear({job_uuid => $job_uuid});
}
}
}
# If we're configured, write out the status JSON file. If we're not configured, Update hardware state files.
my $configured = $anvil->System->check_if_configured;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
if ($configured)
{
# Write out state information for all known Anvil! systems and the information from
# unconfigured nods and DR hosts, using just database data (hence, fast enough to run
# constantly).
$anvil->System->generate_state_json({debug => 2});
}
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process.
run_jobs($anvil, 0);
return(0);
}
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is
# invoked to handle it.
sub run_jobs
{
my ($anvil, $startup) = @_;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup => $startup }});
# Don't start jobs for 30 seconds after startup.
if (not $startup)
{
my $time_since_start = time - $start_time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
time_since_start => $time_since_start,
start_time => $start_time,
}});
if ($time_since_start < 30)
{
# Log that we'll start jobs in X seconds.
my $will_start_in = 30 - $time_since_start;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0326", variables => { will_start_in => $will_start_in }});
return(0);
}
}
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's
# changed on disk.
$anvil->data->{sys}{jobs_running} = 0;
# We'll also update the jobs.json file.
my $jobs_file = "{\"jobs\":[\n";
# Get a list of pending or incomplete jobs.
my $ended_within = $startup ? 1 : 300;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ended_within => $ended_within }});
$anvil->Database->get_jobs({
debug => 2,
ended_within => $ended_within,
});
foreach my $modified_date (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { modified_date => $modified_date }});
foreach my $job_uuid (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}})
{
# Reload the jobs so we get an updated view of them.
$anvil->Database->get_jobs({
debug => 2,
ended_within => $ended_within,
});
# Collect the data.
my $job_command = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_command};
my $short_command = $job_command;
$short_command =~ s/\s.*$//;
my $job_data = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_data};
my $job_picked_up_by = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_by};
my $job_picked_up_at = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_at};
my $job_updated = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_updated};
my $job_name = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_name};
my $job_progress = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_progress};
my $job_title = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_title};
my $job_description = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_description};
my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status};
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0;
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
's01:job_uuid' => $job_uuid,
's02:job_command' => $job_command,
's03:short_command' => $short_command,
's04:job_data' => $job_data,
's05:job_picked_up_by' => $job_picked_up_by,
's06:job_picked_up_at' => $job_picked_up_at,
's07:job_updated' => $job_updated,
's08:job_name' => $job_name,
's09:job_progress' => $job_progress,
's10:job_title' => $job_title,
's11:job_description' => $job_description,
's12:job_status' => $job_status,
's13:started_seconds_ago' => $started_seconds_ago,
's14:updated_seconds_ago' => $updated_seconds_ago,
}});
# If we're not configured, we will only run the 'anvil-configure-host' job
my $configured = $anvil->System->check_if_configured;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { configured => $configured }});
if ((not $configured) && ($job_command !~ /anvil-configure-host/))
{
next;
}
# To minimize the chance of race conditions, any given command will be called only
# once at a time. If two jobs of the same command exist, only one will be called.
if ($job_progress != 100)
{
if (exists $anvil->data->{sys}{started}{$short_command})
{
# Skip it.
my $started_job = $anvil->data->{sys}{started}{$short_command};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0737", variables => {
started_job => $started_job,
job_uuid => $job_uuid,
command => $short_command,
}});
next;
}
$anvil->data->{sys}{started}{$short_command} = $job_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::started::${short_command}" => $anvil->data->{sys}{started}{$short_command} }});
}
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
if (($startup) && ($configured) && ($job_status ne "anvil_startup"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
job_uuid => $job_uuid,
job_command => $job_command,
}});
next;
}
if ($job_progress == 100)
{
# This is a job that might have just completed, clear the started value.
$anvil->data->{jobs}{$job_uuid}{started} = 0;
$job_picked_up_at = 0;
$job_picked_up_by = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_picked_up_at => $job_picked_up_at,
job_picked_up_by => $job_picked_up_by,
"jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started},
}});
}
else
{
$anvil->data->{sys}{jobs_running} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }});
}
# See if the job was picked up by a now-dead instance.
if ($job_picked_up_by)
{
# Check if the PID is still active.
$anvil->System->pids({ignore_me => 1});
### TODO: Add a check to verify the job isn't hung.
# Skip if this job is in progress.
if (not exists $anvil->data->{pids}{$job_picked_up_by})
{
# If the job is done, just clear the 'job_picked_up_by' and be done.
if ($job_progress ne "100")
{
# It's possible that the job updated to 100% and exited after
# we gathered the job data, so we won't restart until we've
# seen it not running and not at 100% after 5 loops.
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid}))
{
$anvil->data->{lost_job_count}{$job_uuid} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
}
if ($anvil->data->{lost_job_count}{$job_uuid} > 5)
{
# The previous job is gone, but the job isn't
# finished. Start it again.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => {
command => $job_command,
pid => $job_picked_up_by,
percent => $job_progress,
}});
# Clear some variables.
$job_progress = 0;
$job_status = "message_0056";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_progress => $job_progress,
job_status => $job_status,
}});
# Clear the job.
$anvil->Job->clear({debug => 2, job_uuid => $job_uuid});
$anvil->data->{lost_job_count}{$job_uuid} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
}
else
{
$anvil->data->{lost_job_count}{$job_uuid}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
}
}
# Clear the PID
$job_picked_up_by = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }});
}
elsif ($job_progress ne "100")
{
# The job is running.
$anvil->data->{jobs_started}{$short_command} = $job_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }});
if ($short_command =~ /anvil-configure-host/)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"pids::${job_picked_up_by}::cpu" => $anvil->data->{pids}{$job_picked_up_by}{cpu},
"pids::${job_picked_up_by}::memory" => $anvil->data->{pids}{$job_picked_up_by}{memory},
"pids::${job_picked_up_by}::state_codes" => $anvil->data->{pids}{$job_picked_up_by}{state_codes},
"pids::${job_picked_up_by}::start_time" => $anvil->data->{pids}{$job_picked_up_by}{start_time},
"pids::${job_picked_up_by}::time" => $anvil->data->{pids}{$job_picked_up_by}{'time'},
"pids::${job_picked_up_by}::command" => $anvil->data->{pids}{$job_picked_up_by}{command},
}});
}
}
}
# Convert the double-banged strings into a proper message.
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : "";
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : "";
my $say_status = $job_status ? $anvil->Words->parse_banged_string({key_string => $job_status}) : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_title => $job_title,
say_description => $say_description,
say_status => $say_status,
}});
# Make the status HTML friendly. Strip any embedded HTML then encode the text string.
if ($say_status)
{
my $html_strip = HTML::Strip->new();
$say_status = $html_strip->parse($say_status);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
# Now make the resulting text string HTML friendly
my $text_to_html = HTML::FromText->new({
urls => 1,
email => 1,
lines => 1,
});
$say_status = $text_to_html->parse($say_status);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
}
# Add this to the jobs.json file
my $json_string = to_json ({
job_uuid => $job_uuid,
job_command => $job_command,
job_data => $job_data,
job_picked_up_at => $job_picked_up_at,
job_updated => $job_updated,
job_name => $job_name,
job_progress => $job_progress,
job_title => $say_title,
job_description => $say_description,
job_status => $say_status,
started_seconds_ago => $started_seconds_ago,
updated_seconds_ago => $updated_seconds_ago,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }});
$jobs_file .= $json_string.",\n";
# If the job is done, move on.
next if $job_progress == 100;
next if $anvil->data->{switches}{'no-start'};
# If 'startup' is set, we only care if 'job_status' is 'anvil_startup'
if ((not $startup) && ($say_status eq "anvil_startup"))
{
# Skip this, it will run next time anvil-daemon restarts.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => {
command => $job_command,
job_uuid => $job_uuid,
}});
next;
}
# If the job is not running, and we've not started any other of the same command this
# loop, start it.
if (not $job_picked_up_by)
{
if (exists $anvil->data->{jobs_started}{$short_command})
{
# Is the job_uuid associated with this command done?
my $started_job_uuid = $anvil->data->{jobs_started}{$short_command};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { started_job_uuid => $started_job_uuid }});
if (exists $anvil->data->{jobs}{running}{$started_job_uuid})
{
# If the previously running job and this job have the same
# UUID, it failed and needs to restart.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
job_uuid => $job_uuid,
started_job_uuid => $started_job_uuid,
"jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress},
}});
if ($started_job_uuid eq $job_uuid)
{
# We're restarting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => {
command => $job_command,
job_uuid => $job_uuid,
}});
}
elsif ($anvil->data->{jobs}{running}{$started_job_uuid}{job_progress} != 100)
{
# Don't start it in this pass.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => {
command => $job_command,
this_job_uuid => $job_uuid,
other_job_uuid => $started_job_uuid,
}});
next;
}
else
{
# The previous job is done, delete it.
$anvil->data->{jobs_started}{$short_command} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command},
}});
}
}
}
my $command = $job_command." --job-uuid ".$job_uuid;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }});
# Have we started this job recently?
if (exists $anvil->data->{jobs}{$job_uuid}{started})
{
my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }});
if ($last_start < 60)
{
# Skip, Started too recently.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => {
command => $command,
last_start => $last_start,
}});
next;
}
}
### Start the job, appending '--job-uuid' to the command.
# If the job is 'anvil-configure-host', DO NOT background it! We need to call it and wait for it to return.
if ($job_command =~ /anvil-configure-host/)
{
# Log that we're going to run this and wait. Our DB connection is
# likely going to break, so disconnect now.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "warning_0178"});
$anvil->Database->disconnect();
my ($output, $return_code) = $anvil->System->call({
debug => 2,
shell_call => $command,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Refresh and reconnect
$anvil->refresh({debug => 2});
$anvil->Database->connect({debug => 2, check_for_resync => 2});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
}
else
{
($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({
background => 1,
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout",
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr",
shell_call => $command,
source => $THIS_FILE,
line => __LINE__,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
return_code => $return_code,
}});
# Log the PID (the job should update the database).
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute.
$anvil->data->{jobs}{$job_uuid}{started} = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started} }});
# Record that a job with this command has started
$anvil->data->{jobs_started}{$short_command} = $job_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }});
}
}
}
}
# Close the jobs file.
$jobs_file =~ s/,\n$/\n/ms;
$jobs_file .= "]}\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }});
# Write the JSON file
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }});
$anvil->Storage->write_file({
file => $output_json,
body => $jobs_file,
overwrite => 1,
backup => 0,
mode => "0644",
user => "striker-ui-api",
group => "striker-ui-api",
});
return(0);
}
#
sub check_files
{
my ($anvil) = @_;
# Make sure the shared directories exist.
foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}})
{
my $directory = $anvil->data->{path}{directories}{shared}{$target};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
target => $target,
directory => $directory,
}});
if (-e $directory)
{
# Make sure the permissions are correct.
$anvil->Storage->get_file_stats({file_path => $directory});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"file_stat::${directory}::unix_mode" => $anvil->data->{file_stat}{$directory}{unix_mode},
}});
if ($anvil->data->{file_stat}{$directory}{unix_mode} !~ /0777$/)
{
$anvil->Storage->change_mode({
debug => 2,
path => $directory,
mode => "0777"
});
}
}
elsif (not -e $anvil->data->{path}{directories}{shared}{$target})
{
my $failed = $anvil->Storage->make_directory({
directory => $directory,
group => "striker-ui-api",
user => "striker-ui-api",
mode => "0777",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }});
if ($failed)
{
# Something went wrong.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => {
directory => $directory,
}});
}
else
{
# Success
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => {
directory => $directory,
}});
}
}
}
# Look for files on our system that are in file_locations. If they're shown as ready, make sure
# they're there. If they're marked as not ready, see if they now are.
$anvil->Storage->check_files({debug => 2});
return(0);
}