e60a1b46b3
Signed-off-by: Digimer <digimer@alteeve.ca>
1516 lines
58 KiB
Perl
Executable File
1516 lines
58 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster
|
|
# nodes and DR hosts.
|
|
#
|
|
# Exit codes;
|
|
# 0 = Normal exit or md5sum of this program changed and it exited to reload.
|
|
# 1 = Not running as root.
|
|
# 2 = Unable to connect to any database, even after trying to initialize the local system.
|
|
#
|
|
# TODO:
|
|
# - Need to check what kind of machine this is and not prep the database unless its a dashboard.
|
|
# - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs
|
|
# - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good
|
|
# idea or not.
|
|
# - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/
|
|
# - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled.
|
|
# - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU=
|
|
# - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes
|
|
# -
|
|
# - Increase DRBD's default timeout
|
|
# - Check for and enable persistent journald logging
|
|
#
|
|
# NOTE:
|
|
# - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the
|
|
# power button. Might be useful in ScanCore down the road.
|
|
#
|
|
# Switches:
|
|
#
|
|
# --main-loop-only
|
|
#
|
|
# This skips the one-time, start-up tasks and just goes into the main-loop,
|
|
#
|
|
# --no-start
|
|
#
|
|
# This will prevent any pending jobs from being picked up and started in this run. Note that other job checks will still happen.
|
|
#
|
|
# --refresh-json
|
|
#
|
|
# This just updates the JSON files used by the web interface. It is the same as '--run-once --main-loop-only --no-start'
|
|
#
|
|
# --run-once
|
|
#
|
|
# This will tell the program to exit after runn the main loop once.
|
|
#
|
|
# --startup-only
|
|
#
|
|
# This will tell the program to exit after running the start up tasks, so the main loop won't run.
|
|
#
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Anvil::Tools;
|
|
use Proc::Simple;
|
|
#use Time::HiRes qw ( time sleep );
|
|
use JSON;
|
|
use HTML::Strip;
|
|
use HTML::FromText;
|
|
use Data::Dumper;
|
|
use Text::Diff;
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
{
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
}
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
$| = 1;
|
|
|
|
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
|
|
$< = $>;
|
|
$( = $);
|
|
|
|
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
|
|
# in the loop as well to override defaults in code.
|
|
my $anvil = Anvil::Tools->new();
|
|
|
|
# Make sure we're running as 'root'
|
|
# $< == real UID, $> == effective UID
|
|
if (($< != 0) && ($> != 0))
|
|
{
|
|
# Not root
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
$anvil->nice_exit({exit_code => 1});
|
|
}
|
|
|
|
# If, so some reason, anvil.conf is lost, create it.
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
# is to setup the database server.
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
|
|
# If I have no databases, sleep for a second and then exit (systemd will restart us).
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
{
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
# dashboard, then just go into a loop waiting for a database to be configured.
|
|
if ($anvil->Get->host_type eq "striker")
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"});
|
|
prep_database($anvil);
|
|
|
|
# Try connecting again
|
|
$anvil->refresh();
|
|
$anvil->Database->connect();
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
{
|
|
# Still nothing, sleep and exit.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"});
|
|
$anvil->nice_exit({exit_code => 2});
|
|
}
|
|
}
|
|
else
|
|
{
|
|
# Wait until we have one.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"});
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
{
|
|
sleep 10;
|
|
|
|
check_network($anvil);
|
|
$anvil->refresh();
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Read switches
|
|
$anvil->data->{switches}{'refresh-json'} = "";
|
|
$anvil->data->{switches}{'run-once'} = 0;
|
|
$anvil->data->{switches}{'main-loop-only'} = 0;
|
|
$anvil->data->{switches}{'no-start'} = 0;
|
|
$anvil->data->{switches}{'startup-only'} = 0;
|
|
$anvil->Get->switches;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
if ($anvil->data->{switches}{'refresh-json'})
|
|
{
|
|
$anvil->data->{switches}{'run-once'} = 1;
|
|
$anvil->data->{switches}{'main-loop-only'} = 1;
|
|
$anvil->data->{switches}{'no-start'} = 1;
|
|
}
|
|
|
|
# This is used to track initial checkes / repairs of network issues.
|
|
$anvil->data->{sys}{network}{initial_checks} = 0;
|
|
|
|
# There are some things we only want to run on (re)start and don't need to always run.
|
|
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'};
|
|
|
|
# Calculate my sum so that we can exit if it changes later.
|
|
$anvil->Storage->record_md5sums;
|
|
|
|
# What time is it, Mr. Fox?
|
|
my $now_time = time;
|
|
|
|
# To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest
|
|
# host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay.
|
|
my $delay = set_delay($anvil);
|
|
|
|
# Once a minute, we'll check the md5sums and see if we should restart.
|
|
# Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards).
|
|
$anvil->data->{timing}{minute_checks} = 60;
|
|
$anvil->data->{timing}{ten_minute_checks} = 600;
|
|
$anvil->data->{timing}{daily_checks} = 86400;
|
|
$anvil->data->{timing}{repo_update_interval} = 86400;
|
|
$anvil->data->{timing}{next_minute_check} = $now_time - 1;
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1;
|
|
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
"s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
"s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
"s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval},
|
|
"s5:now_time" => $now_time,
|
|
"s6:delay" => $delay,
|
|
"s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
"s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
"s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
}});
|
|
|
|
# Disconnect. We'll reconnect inside the loop
|
|
$anvil->Database->disconnect();
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"});
|
|
|
|
# This will prevent restarting while jobs are running.
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
# When we periodically check if system files have changed, we'll also ask Database>connect() to check if it
|
|
# needs to be configured or updated. This is done periodically as it is expensive to run on every loop.
|
|
my $check_if_database_is_configured = 0;
|
|
|
|
# These are the things we always want running.
|
|
while(1)
|
|
{
|
|
# Reload defaults, re-read the config and then connect to the database(s)
|
|
$anvil->refresh();
|
|
|
|
# If, so some reason, anvil.conf is lost, create it.
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
$anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 1});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
|
|
# Mark that we don't want to check the database now.
|
|
$check_if_database_is_configured = 0;
|
|
|
|
# If this host is mapping the network, we'll skip a lot of stuff. If set for over an hour, we'll
|
|
# clear it.
|
|
$anvil->data->{sys}{mapping_network} = check_if_mapping($anvil);
|
|
|
|
if ($anvil->data->{sys}{database}{connections})
|
|
{
|
|
# Run the normal tasks
|
|
keep_running($anvil);
|
|
|
|
# Handle periodic tasks
|
|
handle_periodic_tasks($anvil) if not $anvil->data->{sys}{mapping_network};
|
|
}
|
|
else
|
|
{
|
|
# No databases available, we'll update the state file in case this host is having it's
|
|
# network mapped and the interface used to talk to the databases went down. That's all we
|
|
# can do though.
|
|
update_state_file($anvil);
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "log_0202"});
|
|
}
|
|
|
|
# Exit if 'run-once' selected.
|
|
if ($anvil->data->{switches}{'run-once'})
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
# Disconnect from the database(s) and sleep now.
|
|
$anvil->Database->disconnect();
|
|
sleep(2);
|
|
}
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
|
|
#############################################################################################################
|
|
# Functions #
|
|
#############################################################################################################
|
|
|
|
# Check to see if we're mapping the network on this host.
|
|
sub check_if_mapping
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
$anvil->data->{sys}{mapping_network} = 0;
|
|
if ($anvil->data->{sys}{database}{connections})
|
|
{
|
|
my ($map_network_value, $map_network_uuid, $map_network_mtime, $map_network_modified_date) = $anvil->Database->read_variable({
|
|
debug => 3,
|
|
variable_name => "config::map_network",
|
|
variable_source_table => "hosts",
|
|
variable_source_uuid => $anvil->data->{sys}{host_uuid},
|
|
});
|
|
# We'll run for a day (should be cancelled by the program when the user's done, so this
|
|
# shouldn't fire in practice).
|
|
my $expire_age = 86400;
|
|
my $map_network_age = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
's1:map_network_value' => $map_network_value,
|
|
's2:map_network_mtime' => $map_network_mtime,
|
|
's3:map_network_modified_date' => $map_network_modified_date,
|
|
's4:map_network_uuid' => $map_network_uuid,
|
|
}});
|
|
if ($map_network_uuid)
|
|
{
|
|
$map_network_age = time - $map_network_mtime;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { map_network_age => $map_network_age }});
|
|
}
|
|
if ($map_network_value)
|
|
{
|
|
# How long ago was it set?
|
|
$anvil->data->{switches}{'clear-mapping'} = "" if not defined $anvil->data->{switches}{'clear-mapping'};
|
|
if (($map_network_age >= $expire_age) or ($anvil->data->{switches}{'clear-mapping'}))
|
|
{
|
|
# Clear it.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0470"});
|
|
$anvil->Database->insert_or_update_variables({
|
|
debug => 3,
|
|
variable_value => 0,
|
|
variable_uuid => $map_network_uuid,
|
|
update_value_only => 1,
|
|
});
|
|
}
|
|
else
|
|
{
|
|
# Mark it so we only track the network.
|
|
my $say_age = $anvil->Convert->add_commas({number => $expire_age});
|
|
my $timeout = $anvil->Convert->add_commas({number => ($expire_age - $map_network_age)});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0471", variables => {
|
|
age => $say_age,
|
|
timeout => $timeout,
|
|
}});
|
|
|
|
$anvil->data->{sys}{mapping_network} = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }});
|
|
|
|
# Close any open ssh connections.
|
|
foreach my $ssh_fh_key (keys %{$anvil->data->{cache}{ssh_fh}})
|
|
{
|
|
my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
ssh_fh_key => $ssh_fh_key,
|
|
ssh_fh => $ssh_fh,
|
|
}});
|
|
if ($ssh_fh =~ /^Net::OpenSSH/)
|
|
{
|
|
$ssh_fh->disconnect();
|
|
}
|
|
delete $anvil->data->{cache}{ssh_fh}{$ssh_fh_key};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }});
|
|
return($anvil->data->{sys}{mapping_network});
|
|
}
|
|
|
|
# This decides if the local system will delay daily runs on start-up.
|
|
sub set_delay
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $delay = 7200;
|
|
my $type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }});
|
|
if ($type eq "striker")
|
|
{
|
|
foreach my $uuid (keys %{$anvil->data->{database}})
|
|
{
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"sys::host_uuid" => $anvil->data->{sys}{host_uuid},
|
|
uuid => $uuid,
|
|
}});
|
|
if ($uuid eq $anvil->data->{sys}{host_uuid})
|
|
{
|
|
$delay = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
|
|
}
|
|
last;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
# Not a dashboard, don't delay
|
|
$delay = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
|
|
}
|
|
|
|
return($delay);
|
|
}
|
|
|
|
# This checks to see if it's time to see if the network is ok and, if the system has been up long enough,
|
|
# checks and tries to repair network issues.
|
|
sub check_network
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such,
|
|
# we'll start watching the network after the uptime is 2 minutes.
|
|
my $uptime = $anvil->Get->uptime;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
|
|
if ($uptime > 120)
|
|
{
|
|
# Check that bonds are up. Degraded bonds will be left alone.
|
|
if (not $anvil->data->{sys}{network}{initial_checks})
|
|
{
|
|
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
|
|
if (not $running)
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }});
|
|
my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
|
|
}
|
|
|
|
#$anvil->Network->check_network({heal => "all"});
|
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks},
|
|
}});
|
|
}
|
|
else
|
|
{
|
|
### NOTE: This is constantly trying to "fix" healthy bonds, without a know way to
|
|
### trigger to debug. As such, disabling for now.
|
|
#$anvil->Network->check_network({heal => "down_only"});
|
|
}
|
|
|
|
check_firewall($anvil);
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This handles running tasks that only run on some loops.
|
|
sub handle_periodic_tasks
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $now_time = time;
|
|
my $type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"s1:now_time" => $now_time,
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
"s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
"s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
"s5:type" => $type,
|
|
}});
|
|
|
|
# Time to run once per minute tasks.
|
|
if ($now_time >= $anvil->data->{timing}{next_minute_check})
|
|
{
|
|
# Check the firewall needs to be updated.
|
|
check_network($anvil);
|
|
|
|
# Check to see if the PXE environment needs to be updated.
|
|
check_install_target($anvil);
|
|
|
|
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys.
|
|
$anvil->System->check_ssh_keys({debug => 3});
|
|
|
|
$anvil->System->update_hosts({debug => 3});
|
|
|
|
# Check if the files on disk have changed. Even if it is time to check, don't if a job is
|
|
# running.
|
|
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums))
|
|
{
|
|
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
# Mark that we want to check the database config next time.
|
|
$check_if_database_is_configured = 1;
|
|
|
|
# Update the next check time.
|
|
$anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
}});
|
|
|
|
# Even when this runs, it should finish in under ten seconds so we don't need to background it.
|
|
my ($parse_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { parse_output => $parse_output }});
|
|
|
|
# Scan the local network.
|
|
update_state_file($anvil);
|
|
|
|
# Make sure the shared directories exist.
|
|
foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}})
|
|
{
|
|
my $directory = $anvil->data->{path}{directories}{shared}{$target};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
target => $target,
|
|
directory => $directory,
|
|
}});
|
|
if (not -e $anvil->data->{path}{directories}{shared}{$target})
|
|
{
|
|
my $failed = $anvil->Storage->make_directory({
|
|
directory => $directory,
|
|
group => "apache",
|
|
user => "apache",
|
|
mode => "0775",
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }});
|
|
if ($failed)
|
|
{
|
|
# Something went wrong.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => {
|
|
directory => $directory,
|
|
}});
|
|
}
|
|
else
|
|
{
|
|
# Success
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => {
|
|
directory => $directory,
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check mail server config.
|
|
my $problem = $anvil->Email->check_config({debug => 3});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
|
|
}
|
|
|
|
# Now check to see if it's time to run less frequent tasks.
|
|
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check})
|
|
{
|
|
my $host_type = $anvil->Get->host_type();
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
host_type => $host_type,
|
|
host_uuid => $host_uuid,
|
|
}});
|
|
|
|
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our
|
|
# database.
|
|
if ($host_type eq "striker")
|
|
{
|
|
if ($anvil->data->{sys}{database}{connections} > 1)
|
|
{
|
|
# Sort by UUID, skip the first, and see if we're one of the others.
|
|
my $first_uuid = "";
|
|
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}})
|
|
{
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }});
|
|
if (not $first_uuid)
|
|
{
|
|
$first_uuid = $uuid;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }});
|
|
}
|
|
elsif ($uuid eq $host_uuid)
|
|
{
|
|
# This is us, backup and shut down.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"});
|
|
|
|
# Switch the read_uuid and then close
|
|
$anvil->data->{sys}{database}{read_uuid} = $first_uuid;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }});
|
|
|
|
# Disconnect
|
|
$anvil->data->{cache}{database_handle}{$uuid}->disconnect;
|
|
delete $anvil->data->{cache}{database_handle}{$uuid};
|
|
|
|
# Create a backup, this is useful also for setting the mtime of the last time
|
|
# we were up.
|
|
my $dump_file = $anvil->Database->backup_database({debug => 3});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
# Stop the daemon
|
|
my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
|
|
if ($return_code eq "0")
|
|
{
|
|
# Stopped the daemon.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# If we're the active database, dump out database out and rsync it to our peers.
|
|
my $peers = keys %{$anvil->data->{database}};
|
|
my $connections = $anvil->data->{sys}{database}{connections};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
peers => $peers,
|
|
connections => $connections,
|
|
}});
|
|
if (exists $anvil->data->{cache}{database_handle}{$host_uuid})
|
|
{
|
|
# Verify that the database is up.
|
|
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
if ($running)
|
|
{
|
|
# Backup our DB.
|
|
my $dump_file = $anvil->Database->backup_database({debug => 2});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
# Now rsync it to our peer(s)
|
|
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
|
|
{
|
|
next if $this_host_uuid eq $host_uuid;
|
|
|
|
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/";
|
|
my $password = $anvil->data->{database}{$this_host_uuid}{password};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
this_host_uuid => $this_host_uuid,
|
|
destination => $destination,
|
|
password => $anvil->Log->is_secure($password),
|
|
}});
|
|
|
|
my $start_time = time;
|
|
my $failed = $anvil->Storage->rsync({
|
|
debug => 3,
|
|
destination => $destination,
|
|
password => $password,
|
|
source => $dump_file,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
|
|
|
|
my $rsync_time = time - $start_time;
|
|
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => {
|
|
file => $dump_file,
|
|
host_name => $target_name,
|
|
took => $rsync_time,
|
|
size => $size,
|
|
size_bytes => $size_bytes,
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Update the next check time.
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
"s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
}});
|
|
}
|
|
|
|
# Now check to see if it's time to run daily tasks.
|
|
if ($now_time >= $anvil->data->{timing}{next_daily_check})
|
|
{
|
|
### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we
|
|
### don't use '--force' and let striker-manage-install-target skip the repo update if it happened
|
|
### recently enough.
|
|
if ($type eq "striker")
|
|
{
|
|
# Age out old data. This takes up to a minute.
|
|
$anvil->Database->_age_out_data();
|
|
|
|
# Archive old data
|
|
$anvil->Database->archive_database();
|
|
|
|
# Record a job, don't call it directly. It takes too long to run.
|
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
file => $THIS_FILE,
|
|
line => __LINE__,
|
|
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
|
|
job_data => "",
|
|
job_name => "install-target::refresh",
|
|
job_title => "job_0015",
|
|
job_description => "job_0017",
|
|
job_progress => 0,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
# Update the OUI data.
|
|
($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
file => $THIS_FILE,
|
|
line => __LINE__,
|
|
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
|
|
job_data => "",
|
|
job_name => "oui-data::refresh",
|
|
job_title => "job_0064",
|
|
job_description => "job_0065",
|
|
job_progress => 0,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
# Scan the networks
|
|
($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
file => $THIS_FILE,
|
|
line => __LINE__,
|
|
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
|
|
job_data => "",
|
|
job_name => "scan-network::refresh",
|
|
job_title => "job_0066",
|
|
job_description => "job_0067",
|
|
job_progress => 0,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
}
|
|
|
|
# Update the next check time.
|
|
$anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
"s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
}});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config
|
|
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing
|
|
# anything.
|
|
sub check_install_target
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $system_type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }});
|
|
if ($system_type ne "striker")
|
|
{
|
|
# Not a dashboard, nothing to do.
|
|
return(0);
|
|
}
|
|
|
|
my $status = "unavailable";
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }});
|
|
foreach my $line (split/\n/, $output)
|
|
{
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
|
|
if ($line =~ /status=(\d)/)
|
|
{
|
|
my $digit = $1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }});
|
|
|
|
if ($digit == 0)
|
|
{
|
|
$status = "disabled";
|
|
}
|
|
elsif ($digit == 1)
|
|
{
|
|
$status = "enabled";
|
|
}
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }});
|
|
last;
|
|
}
|
|
}
|
|
|
|
# Record the status
|
|
$anvil->Database->insert_or_update_variables({
|
|
variable_name => "install-target::enabled",
|
|
variable_source_uuid => $anvil->Get->host_uuid,
|
|
variable_source_table => "hosts",
|
|
variable_value => $status,
|
|
variable_default => "unavailable",
|
|
variable_description => "striker_0110",
|
|
variable_section => "system",
|
|
});
|
|
|
|
return(0);
|
|
}
|
|
|
|
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the
|
|
# daemon is restarted or reloaded.
|
|
sub run_once
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Check that the database is ready.
|
|
prep_database($anvil);
|
|
|
|
# Check to see if we need to do boot-time tasks. We only run these if we've just booted
|
|
boot_time_tasks($anvil);
|
|
|
|
# Check the ssh stuff.
|
|
# NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well.
|
|
$anvil->System->check_ssh_keys();
|
|
|
|
# Check setuid wrappers
|
|
check_setuid_wrappers($anvil);
|
|
|
|
# Check journald is configured for persistent storage.
|
|
check_journald($anvil);
|
|
|
|
if ($anvil->data->{switches}{'startup-only'})
|
|
{
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
sub check_journald
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Check the journald.conf to ensure logging in configured to be persistent.
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
my $peristent_seen = 0;
|
|
my $change_storage = 0;
|
|
my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }});
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
{
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
if ($line =~ /^Storage=(.*)$/)
|
|
{
|
|
my $value = $1;
|
|
if ($value eq "persistent")
|
|
{
|
|
$peristent_seen = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }});
|
|
}
|
|
else
|
|
{
|
|
$change_storage = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }});
|
|
}
|
|
}
|
|
}
|
|
|
|
# Make sure the journald directory
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }});
|
|
if (not -d $anvil->data->{path}{directories}{journald})
|
|
{
|
|
$anvil->Storage->make_directory({
|
|
debug => 2,
|
|
directory => $anvil->data->{path}{directories}{journald},
|
|
});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }});
|
|
}
|
|
|
|
# Make sure the journald is configured for persistent storage.
|
|
if (not $peristent_seen)
|
|
{
|
|
my $storage_added = 0;
|
|
my $new_journald_conf = "";
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
{
|
|
if (($line =~ /^Storage=/) && ($change_storage))
|
|
{
|
|
if (not $storage_added)
|
|
{
|
|
$storage_added = 1;
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
}
|
|
next;
|
|
}
|
|
if (($line =~ /^#Storage=/) && (not $storage_added))
|
|
{
|
|
$storage_added = 1;
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
}
|
|
$new_journald_conf .= $line."\n";
|
|
}
|
|
if (not $storage_added)
|
|
{
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
}
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }});
|
|
|
|
$anvil->Storage->write_file({
|
|
debug => 3,
|
|
secure => 0,
|
|
file => $anvil->data->{path}{configs}{'journald.conf'},
|
|
body => $new_journald_conf,
|
|
mode => "0644",
|
|
overwrite => 1,
|
|
});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
|
|
# Restart the journald service.
|
|
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This creates, as needed, the setuid wrappers used by apache to make certain system calls.
|
|
sub check_setuid_wrappers
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
|
|
if ($host_type ne "striker")
|
|
{
|
|
# Not a dashboard, setuid scripts aren't needed.
|
|
return(0);
|
|
}
|
|
|
|
# Does the call_striker-get-peer-data wrapper exist yet?
|
|
if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
{
|
|
# Exists, skipping.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }});
|
|
}
|
|
else
|
|
{
|
|
# What is the admin user and group ID?
|
|
my $admin_uid = getpwnam('admin');
|
|
my $admin_gid = getgrnam('admin');
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
admin_uid => $admin_uid,
|
|
admin_gid => $admin_gid,
|
|
}});
|
|
next if not $admin_uid;
|
|
next if not $admin_gid;
|
|
|
|
# Write the body out
|
|
my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n";
|
|
$call_striker_get_peer_data_body .= "main(ac, av)\n";
|
|
$call_striker_get_peer_data_body .= "char **av;\n";
|
|
$call_striker_get_peer_data_body .= "{\n";
|
|
$call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n";
|
|
$call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n";
|
|
$call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n";
|
|
$call_striker_get_peer_data_body .= "}\n";
|
|
my $error = $anvil->Storage->write_file({
|
|
debug => 3,
|
|
file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
body => $call_striker_get_peer_data_body,
|
|
mode => '644',
|
|
overwrite => 1,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }});
|
|
|
|
# If it wrote out, compile it.
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c")
|
|
{
|
|
# Failed to write.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
}
|
|
else
|
|
{
|
|
# Compile it
|
|
my ($output, $return_code) = $anvil->System->call({
|
|
debug => 3,
|
|
shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
|
|
# If it compiled, setuid it.
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
{
|
|
# Something went wrong compiling it.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
}
|
|
else
|
|
{
|
|
$anvil->Storage->change_owner({
|
|
debug => 3,
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
user => 'root',
|
|
group => 'root',
|
|
});
|
|
$anvil->Storage->change_mode({
|
|
debug => 3,
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
mode => '4755',
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# Configure/update the firewall.
|
|
sub check_firewall
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Don't call this if we're not configured yet.
|
|
my $configured = $anvil->System->check_if_configured({debug => 3});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
|
|
# Check the firewall needs to be updated.
|
|
if ($configured)
|
|
{
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This handles tasks that need to run on boot (if any)
|
|
sub boot_time_tasks
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# If the uptime is less than ten minutes, clear the reboot flag.
|
|
my $uptime = $anvil->Get->uptime;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }});
|
|
|
|
# Now find out if a reboot is listed as needed and when it was last changed.
|
|
my $reboot_needed = 0;
|
|
my $changed_seconds_ago = 0;
|
|
my $query = "
|
|
SELECT
|
|
variable_value,
|
|
(SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago
|
|
FROM
|
|
variables
|
|
WHERE
|
|
variable_source_table = 'hosts'
|
|
AND
|
|
variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
|
|
AND
|
|
variable_name = 'reboot::needed'
|
|
;";
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
|
|
my $count = @{$results};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
results => $results,
|
|
count => $count,
|
|
}});
|
|
if ($count)
|
|
{
|
|
$reboot_needed = $results->[0]->[0];
|
|
$changed_seconds_ago = $results->[0]->[1];
|
|
$changed_seconds_ago =~ s/^(\d+)\..*$/$1/;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
reboot_needed => $reboot_needed,
|
|
changed_seconds_ago => $changed_seconds_ago,
|
|
}});
|
|
}
|
|
|
|
# If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was
|
|
# set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to
|
|
# Lisa Seelye (@thedoh) for this idea!
|
|
my $difference = ($changed_seconds_ago - $uptime);
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"s1:reboot_needed" => $reboot_needed,
|
|
"s2:changed_seconds_ago" => $changed_seconds_ago,
|
|
"s3:uptime" => $uptime,
|
|
"s4:difference" => $difference,
|
|
}});
|
|
if ($reboot_needed)
|
|
{
|
|
if ($uptime < $changed_seconds_ago)
|
|
{
|
|
# Clear the reboot request.
|
|
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
|
|
|
|
# Check to see if there was a reboot job in progress. If so, finish it off.
|
|
my $job_uuid = $anvil->Job->get_job_uuid({
|
|
debug => 2,
|
|
program => "anvil-manage-power",
|
|
incomplete => 1,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
if ($job_uuid)
|
|
{
|
|
# Update the percentage to '100' and then clear the old PID.
|
|
my $date_time = $anvil->Get->date_and_time();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }});
|
|
|
|
$anvil->Job->update_progress({
|
|
progress => 100,
|
|
message => "message_0064,!!date_and_time!".$date_time."!!",
|
|
job_uuid => $job_uuid,
|
|
picked_up_by => 0,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
# Update our status
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"});
|
|
|
|
$anvil->Database->get_hosts({debug => 2});
|
|
my $host_uuid = $anvil->Get->host_uuid({debug => 2});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }});
|
|
$anvil->Database->insert_or_update_hosts({
|
|
debug => 2,
|
|
host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi},
|
|
host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key},
|
|
host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name},
|
|
host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type},
|
|
host_uuid => $host_uuid,
|
|
host_status => "online",
|
|
});
|
|
|
|
# Make sure our stop reason is cleared.
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
variable_name => 'system::stop_reason',
|
|
variable_value => '',
|
|
variable_default => '',
|
|
variable_description => 'striker_0279',
|
|
variable_section => 'system',
|
|
variable_source_uuid => $host_uuid,
|
|
variable_source_table => 'hosts',
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }});
|
|
}
|
|
|
|
# Make sure /etc/hosts is updated.
|
|
$anvil->System->update_hosts();
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
handle_special_cases($anvil);
|
|
|
|
# Now look for jobs that have a job status of 'anvil_startup'
|
|
run_jobs($anvil, 1);
|
|
|
|
# Check the firewall needs to be updated.
|
|
check_firewall($anvil);
|
|
|
|
# If we're a striker, check apache
|
|
my $host_type = $anvil->Get->host_type;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
if ($host_type eq "striker")
|
|
{
|
|
$anvil->Striker->check_httpd_conf({debug => 2});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
sub handle_special_cases
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# RHBZ #1961562 - https://bugzilla.redhat.com/show_bug.cgi?id=1961562#c16
|
|
my $host_type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
if ($host_type ne "striker")
|
|
{
|
|
# We're a node or DR host. We need to touch this file.
|
|
my $work_around_file = "/etc/qemu/firmware/50-edk2-ovmf-cc.json";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { work_around_file => $work_around_file }});
|
|
if (not -e $work_around_file)
|
|
{
|
|
$anvil->Storage->write_file({
|
|
debug => 2,
|
|
file => $work_around_file,
|
|
body => "",
|
|
overwrite => 0,
|
|
backup => 0,
|
|
mode => "0644",
|
|
user => "root",
|
|
group => "root",
|
|
});
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# Configure the local database, if needed.
|
|
sub prep_database
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# If there's a backup file, we're configured and possibly just off.
|
|
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$anvil->Get->host_uuid().".out";
|
|
$dump_file =~ s/\/\//\//g;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { dump_file => $dump_file }});
|
|
if (-e $dump_file)
|
|
{
|
|
# No need to prepare.
|
|
return(0);
|
|
}
|
|
|
|
# Only run this if we're a dashboard.
|
|
my $host_type = $anvil->Get->host_type();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
if ($host_type eq "striker")
|
|
{
|
|
### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging.
|
|
#my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__ });
|
|
my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure", source => $THIS_FILE, line => __LINE__ });
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
database_output => $database_output,
|
|
return_code => $return_code,
|
|
}});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# These are tools that need to keep running.
|
|
sub keep_running
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Check for jobs that were running and now exited.
|
|
if ((not $anvil->data->{sys}{mapping_network}) && (exists $anvil->data->{processes}))
|
|
{
|
|
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}})
|
|
{
|
|
# If it's not a handle, delete it.
|
|
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
running => $running,
|
|
}});
|
|
|
|
# If it's not running, update the table to clear the 'job_picked_up_by' column.
|
|
if (not $running)
|
|
{
|
|
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
|
|
job_uuid => $job_uuid,
|
|
exit_status => $exit_status,
|
|
}});
|
|
|
|
# Free up memory
|
|
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup();
|
|
|
|
$anvil->Job->clear({job_uuid => $job_uuid});
|
|
}
|
|
}
|
|
}
|
|
|
|
# If we're configured, write out the status JSON file. If we're not configured, Update hardware state files.
|
|
my $configured = $anvil->System->check_if_configured;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
if ((not $anvil->data->{sys}{mapping_network}) && ($configured))
|
|
{
|
|
# Write out state information for all known Anvil! systems and the information from
|
|
# unconfigured nods and DR hosts, using just database data (hence, fast enough to run
|
|
# constantly).
|
|
$anvil->System->generate_state_json({debug => 3});
|
|
}
|
|
else
|
|
{
|
|
# Run this to monitor the network in real time.
|
|
update_state_file($anvil);
|
|
}
|
|
|
|
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process.
|
|
run_jobs($anvil, 0) if not $anvil->data->{sys}{mapping_network};
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made
|
|
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is
|
|
# invoked to handle it.
|
|
sub run_jobs
|
|
{
|
|
my ($anvil, $startup) = @_;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { startup => $startup }});
|
|
|
|
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's
|
|
# changed on disk.
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
# We'll also update the jobs.json file.
|
|
my $jobs_file = "{\"jobs\":[\n";
|
|
|
|
# Get a list of pending or incomplete jobs.
|
|
my $ended_within = $startup ? 1 : 300;
|
|
my $return = $anvil->Database->get_jobs({ended_within => $ended_within});
|
|
my $count = @{$return};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
ended_within => $ended_within,
|
|
'return' => $return,
|
|
count => $count,
|
|
}});
|
|
foreach my $hash_ref (@{$return})
|
|
{
|
|
my $job_uuid = $hash_ref->{job_uuid};
|
|
my $job_command = $hash_ref->{job_command};
|
|
my $job_data = $hash_ref->{job_data};
|
|
my $job_picked_up_by = $hash_ref->{job_picked_up_by};
|
|
my $job_picked_up_at = $hash_ref->{job_picked_up_at};
|
|
my $job_updated = $hash_ref->{job_updated};
|
|
my $job_name = $hash_ref->{job_name};
|
|
my $job_progress = $hash_ref->{job_progress};
|
|
my $job_title = $hash_ref->{job_title};
|
|
my $job_description = $hash_ref->{job_description};
|
|
my $job_status = $hash_ref->{job_status};
|
|
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0;
|
|
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
job_uuid => $job_uuid,
|
|
job_command => $job_command,
|
|
job_data => $job_data,
|
|
job_picked_up_by => $job_picked_up_by,
|
|
job_picked_up_at => $job_picked_up_at,
|
|
job_updated => $job_updated,
|
|
job_name => $job_name,
|
|
job_progress => $job_progress,
|
|
job_title => $job_title,
|
|
job_description => $job_description,
|
|
job_status => $job_status,
|
|
started_seconds_ago => $started_seconds_ago,
|
|
updated_seconds_ago => $updated_seconds_ago,
|
|
}});
|
|
|
|
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
|
|
if (($startup) && ($job_status ne "anvil_startup"))
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
|
|
job_uuid => $job_uuid,
|
|
job_command => $job_command,
|
|
}});
|
|
next;
|
|
}
|
|
|
|
if ($job_progress ne "100")
|
|
{
|
|
$anvil->data->{sys}{jobs_running} = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }});
|
|
}
|
|
|
|
# See if the job was picked up by a now-dead instance.
|
|
if ($job_picked_up_by)
|
|
{
|
|
# Check if the PID is still active.
|
|
$anvil->System->pids({ignore_me => 1});
|
|
|
|
### TODO: Add a check to verify the job isn't hung.
|
|
# Skip if this job is in progress.
|
|
if (not exists $anvil->data->{pids}{$job_picked_up_by})
|
|
{
|
|
# If the job is done, just clear the 'job_picked_up_by' and be done.
|
|
if ($job_progress ne "100")
|
|
{
|
|
# It's possible that the job updated to 100% and exited after we
|
|
# gathered the job data, so we won't restart until we've seen it not
|
|
# running and not at 100% after 5 loops.
|
|
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid}))
|
|
{
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
}
|
|
if ($anvil->data->{lost_job_count}{$job_uuid} > 5)
|
|
{
|
|
# The previous job is gone, but the job isn't finished. Start it again.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => {
|
|
command => $job_command,
|
|
pid => $job_picked_up_by,
|
|
percent => $job_progress,
|
|
}});
|
|
|
|
# Clear some variables.
|
|
$job_progress = 0;
|
|
$job_status = "message_0056";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
job_progress => $job_progress,
|
|
job_status => $job_status,
|
|
}});
|
|
|
|
# Clear the job.
|
|
$anvil->Job->clear({debug => 2, job_uuid => $job_uuid});
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
}
|
|
else
|
|
{
|
|
$anvil->data->{lost_job_count}{$job_uuid}++;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
}
|
|
}
|
|
|
|
# Clear the PID
|
|
$job_picked_up_by = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }});
|
|
}
|
|
}
|
|
|
|
# Convert the double-banged strings into a proper message.
|
|
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : "";
|
|
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : "";
|
|
my $say_status = $job_status ? $anvil->Words->parse_banged_string({key_string => $job_status}) : "";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
job_title => $job_title,
|
|
say_description => $say_description,
|
|
say_status => $say_status,
|
|
}});
|
|
|
|
# Make the status HTML friendly. Strip any embedded HTML then encode the text string.
|
|
if ($say_status)
|
|
{
|
|
my $html_strip = HTML::Strip->new();
|
|
$say_status = $html_strip->parse($say_status);
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
|
|
# Now make the resulting text string HTML friendly
|
|
my $text_to_html = HTML::FromText->new({
|
|
urls => 1,
|
|
email => 1,
|
|
lines => 1,
|
|
});
|
|
$say_status = $text_to_html->parse($say_status);
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
}
|
|
|
|
# Add this to the jobs.json file
|
|
my $json_string = to_json ({
|
|
job_uuid => $job_uuid,
|
|
job_command => $job_command,
|
|
job_data => $job_data,
|
|
job_picked_up_at => $job_picked_up_at,
|
|
job_updated => $job_updated,
|
|
job_name => $job_name,
|
|
job_progress => $job_progress,
|
|
job_title => $say_title,
|
|
job_description => $say_description,
|
|
job_status => $say_status,
|
|
started_seconds_ago => $started_seconds_ago,
|
|
updated_seconds_ago => $updated_seconds_ago,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }});
|
|
$jobs_file .= $json_string.",\n";
|
|
|
|
# If the job is done, move on.
|
|
next if $job_progress eq "100";
|
|
next if $anvil->data->{switches}{'no-start'};
|
|
|
|
# If 'startup' is set, we only care if 'job_status' is 'anvil_startup'
|
|
if ((not $startup) && ($say_status eq "anvil_startup"))
|
|
{
|
|
# Skip this, it will run next time anvil-daemon restarts.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => {
|
|
command => $job_command,
|
|
job_uuid => $job_uuid,
|
|
}});
|
|
next;
|
|
}
|
|
|
|
# If the job is not running, start it.
|
|
if (not $job_picked_up_by)
|
|
{
|
|
my $command = $job_command." --job-uuid ".$job_uuid;
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }});
|
|
|
|
# Have we started this job recently?
|
|
if (exists $anvil->data->{jobs}{$job_uuid}{started})
|
|
{
|
|
my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }});
|
|
|
|
if ($last_start < 60)
|
|
{
|
|
# Skip, Started too recently.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => {
|
|
command => $command,
|
|
last_start => $last_start,
|
|
}});
|
|
next;
|
|
}
|
|
}
|
|
|
|
# Start the job, appending '--job-uuid' to the command.
|
|
($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({
|
|
background => 1,
|
|
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout",
|
|
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr",
|
|
shell_call => $command,
|
|
source => $THIS_FILE,
|
|
line => __LINE__,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
return_code => $return_code,
|
|
}});
|
|
|
|
# Log the PID (the job should update the database).
|
|
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
|
|
|
|
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute.
|
|
$anvil->data->{jobs}{$job_uuid}{started} = time;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }});
|
|
}
|
|
}
|
|
|
|
# Close the jobs file.
|
|
$jobs_file =~ s/,\n$/\n/ms;
|
|
$jobs_file .= "]}\n";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }});
|
|
|
|
# Write the JSON file
|
|
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }});
|
|
$anvil->Storage->write_file({
|
|
file => $output_json,
|
|
body => $jobs_file,
|
|
overwrite => 1,
|
|
backup => 0,
|
|
mode => "0644",
|
|
user => "apache",
|
|
group => "apache",
|
|
});
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This calls 'anvil-update-states' which will scan the local machine's state (hardware and software) and
|
|
# record write it out to an HTML file
|
|
sub update_state_file
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0480"});
|
|
|
|
#my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}.$anvil->Log->switches;
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }});
|
|
|
|
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
states_output => $states_output,
|
|
return_code => $return_code,
|
|
}});
|
|
|
|
return(0);
|
|
}
|