|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster
|
|
|
|
# nodes and DR hosts.
|
|
|
|
#
|
|
|
|
# Exit codes;
|
|
|
|
# 0 = Normal exit or md5sum of this program changed and it exited to reload.
|
|
|
|
# 1 = Not running as root.
|
|
|
|
# 2 = Unable to connect to any database, even after trying to initialize the local system.
|
|
|
|
#
|
|
|
|
# TODO:
|
|
|
|
# - Need to check what kind of machine this is and not prep the database unless its a dashboard.
|
|
|
|
# - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs
|
|
|
|
# - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good
|
|
|
|
# idea or not.
|
|
|
|
# - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/
|
|
|
|
# - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled.
|
|
|
|
# - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU=
|
|
|
|
# - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes
|
|
|
|
# -
|
|
|
|
# - Increase DRBD's default timeout
|
|
|
|
# - Check for and enable persistent journald logging
|
|
|
|
#
|
|
|
|
# NOTE:
|
|
|
|
# - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the
|
|
|
|
# power button. Might be useful in ScanCore down the road.
|
|
|
|
#
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use Anvil::Tools;
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
use Proc::Simple;
|
|
|
|
#use Time::HiRes qw ( time sleep );
|
|
|
|
use JSON;
|
|
|
|
use HTML::Strip;
|
|
|
|
use HTML::FromText;
|
|
|
|
use Data::Dumper;
|
|
|
|
use Text::Diff;
|
|
|
|
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
|
|
{
|
|
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
|
|
|
|
$< = $>;
|
|
|
|
$( = $);
|
|
|
|
|
|
|
|
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
|
|
|
|
# in the loop as well to override defaults in code.
|
|
|
|
my $anvil = Anvil::Tools->new();
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
|
|
|
|
# Make sure we're running as 'root'
|
|
|
|
# $< == real UID, $> == effective UID
|
|
|
|
if (($< != 0) && ($> != 0))
|
|
|
|
{
|
|
|
|
# Not root
|
|
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
# If, so some reason, anvil.conf is lost, create it.
|
|
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
# If dnf is running, hold.
|
|
|
|
$anvil->System->wait_on_dnf();
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
|
|
|
|
# If we've got bonds, wait for them to be up. Then wait for NetworkManager to be up.
|
|
|
|
$anvil->Network->wait_on_nm_online({debug => 2});
|
|
|
|
$anvil->Network->wait_for_network({debug => 2});
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
|
|
# is to setup the database server.
|
|
|
|
$anvil->Database->connect({
|
|
|
|
check_if_configured => 1,
|
|
|
|
check_for_resync => 2,
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
|
|
|
|
|
|
# If I have no databases, sleep for a second and then exit (systemd will restart us).
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
|
|
# dashboard, then just go into a loop waiting for a database to be configured.
|
|
|
|
if ($anvil->Get->host_type eq "striker")
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"});
|
|
|
|
prep_database($anvil);
|
|
|
|
|
|
|
|
# Try connecting again
|
|
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 2});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Still nothing, sleep and exit.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"});
|
|
|
|
$anvil->nice_exit({exit_code => 2});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Striker can't initialize us unless it can ssh into us, so make sure root login is enabled.
|
|
|
|
chech_sshd($anvil);
|
|
|
|
|
|
|
|
# Wait until we have one.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"});
|
|
|
|
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
sleep 10;
|
|
|
|
|
|
|
|
check_network($anvil);
|
|
|
|
$anvil->refresh();
|
|
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 2});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Read switches
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
$anvil->Get->switches({list => [
|
|
|
|
"clear-mapping",
|
|
|
|
"refresh-json",
|
|
|
|
"run-once",
|
|
|
|
"main-loop-only",
|
|
|
|
"no-start",
|
|
|
|
"startup-only"], man => $THIS_FILE});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
|
|
|
|
if ($anvil->data->{switches}{'refresh-json'})
|
|
|
|
{
|
|
|
|
$anvil->data->{switches}{'run-once'} = 1;
|
|
|
|
$anvil->data->{switches}{'main-loop-only'} = 1;
|
|
|
|
$anvil->data->{switches}{'no-start'} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"switches::run-once" => $anvil->data->{switches}{'run-once'},
|
|
|
|
"switches::main-loop-only" => $anvil->data->{switches}{'main-loop-only'},
|
|
|
|
"switches::no-start" => $anvil->data->{switches}{'no-start'},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
# This is used to track initial checkes / repairs of network issues.
|
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 0;
|
|
|
|
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
# We use this to delay starting jobs for a short time.
|
|
|
|
our $start_time = time;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }});
|
|
|
|
|
|
|
|
# There are some things we only want to run on (re)start and don't need to always run.
|
|
|
|
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'};
|
|
|
|
|
|
|
|
# Calculate my sum so that we can exit if it changes later.
|
|
|
|
$anvil->Storage->record_md5sums;
|
|
|
|
|
|
|
|
# What time is it, Mr. Fox?
|
|
|
|
my $now_time = time;
|
|
|
|
|
|
|
|
# To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest
|
|
|
|
# host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay.
|
|
|
|
my $delay = set_delay($anvil);
|
|
|
|
|
|
|
|
# Once a minute, we'll check the md5sums and see if we should restart.
|
|
|
|
# Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards).
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
$anvil->data->{timing}{minute_checks} = 60;
|
|
|
|
$anvil->data->{timing}{ten_minute_checks} = 600;
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
$anvil->data->{timing}{daily_checks} = 86400;
|
|
|
|
$anvil->data->{timing}{repo_update_interval} = 86400;
|
|
|
|
$anvil->data->{timing}{next_minute_check} = $now_time - 1;
|
|
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1;
|
|
|
|
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
|
|
"s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
|
|
"s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
|
|
"s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval},
|
|
|
|
"s5:now_time" => $now_time,
|
|
|
|
"s6:delay" => $delay,
|
|
|
|
"s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
"s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
"s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Disconnect. We'll reconnect inside the loop
|
|
|
|
$anvil->Database->disconnect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"});
|
|
|
|
|
|
|
|
# This will prevent restarting while jobs are running.
|
|
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
|
|
|
|
# When we periodically check if system files have changed, we'll also ask Database>connect() to check if it
|
|
|
|
# needs to be configured or updated. This is done periodically as it is expensive to run on every loop.
|
|
|
|
my $check_if_database_is_configured = 0;
|
|
|
|
|
|
|
|
# These are the things we always want running.
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
# Reload defaults, re-read the config and then connect to the database(s)
|
|
|
|
$anvil->refresh();
|
|
|
|
|
|
|
|
# If, for some reason, anvil.conf is lost, create it.
|
|
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
|
|
|
|
$anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 2});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
|
|
|
|
|
|
# Mark that we don't want to check the database now.
|
|
|
|
$check_if_database_is_configured = 0;
|
|
|
|
if ($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Run the normal tasks
|
|
|
|
keep_running($anvil);
|
|
|
|
|
|
|
|
# Handle periodic tasks
|
|
|
|
handle_periodic_tasks($anvil);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Exit if 'run-once' selected.
|
|
|
|
if ($anvil->data->{switches}{'run-once'})
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check how much RAM we're using.
|
|
|
|
check_ram($anvil);
|
|
|
|
|
|
|
|
# Disconnect from the database(s) and sleep now.
|
|
|
|
$anvil->Database->disconnect();
|
|
|
|
sleep(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
|
|
|
|
|
|
|
|
#############################################################################################################
|
|
|
|
# Functions #
|
|
|
|
#############################################################################################################
|
|
|
|
|
|
|
|
# If we're using too much ram, send an alert and exit.
|
|
|
|
sub check_ram
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Problem 0 == ok, 1 == too much ram used, 2 == no pid found
|
|
|
|
my ($problem, $ram_used) = $anvil->System->check_ram_use({program => $THIS_FILE});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
problem => $problem,
|
|
|
|
ram_used => $anvil->Convert->add_commas({number => $ram_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}).")",
|
|
|
|
}});
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# See if any jobs are running, and if so, hold because those jobs might be doing things (like
|
|
|
|
# OS updates or file syncs) that could make anvil-daemon appear to be using more memory.
|
|
|
|
$anvil->Database->get_jobs({debug => 2});
|
|
|
|
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}})
|
|
|
|
{
|
|
|
|
my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command};
|
|
|
|
my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_command => $job_command,
|
|
|
|
job_progress => $job_progress,
|
|
|
|
}});
|
|
|
|
|
|
|
|
if (($job_progress != 100) && ($job_progress != 0))
|
|
|
|
{
|
|
|
|
# Don't abort.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => {
|
|
|
|
job_command => $job_command,
|
|
|
|
job_progress => $job_progress,
|
|
|
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
|
|
|
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
|
|
|
|
}});
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Send an alert and exit.
|
|
|
|
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => {
|
|
|
|
program => $THIS_FILE,
|
|
|
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
|
|
|
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
|
|
|
|
}, set_by => $THIS_FILE, sort_position => 0});
|
|
|
|
$anvil->Email->send_alerts();
|
|
|
|
|
|
|
|
# Log the same
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0357", variables => {
|
|
|
|
program => $THIS_FILE,
|
|
|
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
|
|
|
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Exit with RC0 so that systemctl restarts
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This decides if the local system will delay daily runs on start-up.
|
|
|
|
sub set_delay
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $delay = 7200;
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"sys::host_uuid" => $anvil->data->{sys}{host_uuid},
|
|
|
|
uuid => $uuid,
|
|
|
|
}});
|
|
|
|
if ($uuid eq $anvil->data->{sys}{host_uuid})
|
|
|
|
{
|
|
|
|
$delay = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { delay => $delay }});
|
|
|
|
}
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Not a dashboard, don't delay
|
|
|
|
$delay = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
|
|
|
|
}
|
|
|
|
|
|
|
|
return($delay);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This checks to see if it's time to see if the network is ok and, if the system has been up long enough,
|
|
|
|
# checks and tries to repair network issues.
|
|
|
|
sub check_network
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
### TODO: Remove this when EL8 support is dropped. This was an issue with the old ifcfg configured bonds
|
|
|
|
# The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such,
|
|
|
|
# we'll start watching the network after the uptime is 2 minutes.
|
|
|
|
my $uptime = $anvil->Get->uptime;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
|
|
|
|
if ($uptime > 120)
|
|
|
|
{
|
|
|
|
# Check that bonds are up. Degraded bonds will be left alone.
|
|
|
|
if (not $anvil->data->{sys}{network}{initial_checks})
|
|
|
|
{
|
|
|
|
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
|
|
|
|
|
|
if (not $running)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }});
|
|
|
|
my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
|
|
|
|
}
|
|
|
|
|
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
check_firewall($anvil);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check that all users can ping.
|
|
|
|
if (1)
|
|
|
|
{
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{sysctl}." net.ipv4.ping_group_range";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }});
|
|
|
|
|
|
|
|
if ($output =~ /net.ipv4.ping_group_range = (\d+)\t(\d+)$/)
|
|
|
|
{
|
|
|
|
my $lowest_uid = $1;
|
|
|
|
my $highest_uid = $2;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
lowest_uid => $lowest_uid,
|
|
|
|
highest_uid => $highest_uid,
|
|
|
|
}});
|
|
|
|
|
|
|
|
if ($highest_uid < 2000)
|
|
|
|
{
|
|
|
|
# Tell the user we're enabling ping for all users.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0683"});
|
|
|
|
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{sysctl}." -w net.ipv4.ping_group_range=\"0 2147483647\"";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { shell_call => $shell_call }});
|
|
|
|
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { output => $output }});
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check that there's at least one entry in 'network_interfaces' and, if not, call scan-network.
|
|
|
|
if (1)
|
|
|
|
{
|
|
|
|
my $query = "SELECT COUNT(*) FROM network_interfaces WHERE network_interface_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid).";";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
|
|
|
|
|
|
|
|
my $count = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0];
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { count => $count }});
|
|
|
|
if (not $count)
|
|
|
|
{
|
|
|
|
# Run scan-network
|
|
|
|
my $shell_call = $anvil->data->{path}{directories}{scan_agents}."/scan-network/scan-network".$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles running tasks that only run on some loops.
|
|
|
|
sub handle_periodic_tasks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $now_time = time;
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
"s1:now_time" => $now_time,
|
|
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
"s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
"s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
"s5:host_type" => $host_type,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Time to run once per minute tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_minute_check})
|
|
|
|
{
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
check_network($anvil);
|
|
|
|
|
|
|
|
# Check to see if the PXE environment needs to be updated.
|
|
|
|
check_install_target($anvil);
|
|
|
|
|
|
|
|
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys.
|
|
|
|
$anvil->System->check_ssh_keys({debug => 2});
|
|
|
|
|
|
|
|
$anvil->System->update_hosts({debug => 2});
|
|
|
|
|
|
|
|
# Check if the files on disk have changed. Even if it is time to check, don't if a job is
|
|
|
|
# running.
|
|
|
|
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums))
|
|
|
|
{
|
|
|
|
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Mark that we want to check the database config next time.
|
|
|
|
$check_if_database_is_configured = 1;
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Even when this runs, it should finish in under ten seconds so we don't need to background it.
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($parse_output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
parse_output => $parse_output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
* Moved the fences_unified_metadata file from /tmp, which apache can not read, to /var/www/html/.
* Fixed a bug (well, made a work-around for an issue without a known reproducer) where, on some occassion, a record will end up in the public table without being copied into the history schema. When this happens, the next resync would crash out because the resynd reads in the history table only. Now, when about to INSERT a record into the public schema during a resync, an explicit check is made to see if the record alread
y exists. If it does, the INSERT is instead redirected to the history schema.
* Cleaned up the fence agent metadata when displaying to a user, converting the shell codes to underline a string with square brackets instead. We also now replace newlines with <br /> tags. Lastly, to help fence_azure_arm's metadata description to display cleanly, a check is made to format the table correctly.
* Began work on the Striker menu for handling fence device management
Signed-off-by: Digimer <digimer@alteeve.ca>
5 years ago
|
|
|
|
|
|
|
# Check shared files.
|
|
|
|
check_files($anvil);
|
|
|
|
|
|
|
|
# Check mail server config.
|
|
|
|
my $problem = $anvil->Email->check_config({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
|
|
|
|
|
|
|
|
# Check if anything is needed to be done in /mnt/shared.
|
|
|
|
check_incoming($anvil);
|
|
|
|
|
|
|
|
# Check for stale db_in_use states.
|
|
|
|
check_db_in_use_states($anvil);
|
|
|
|
|
|
|
|
# Do Striker-specific minute tasks
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
# Look for duplicates if we're the primary DB.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"sys::database::primary_db" => $anvil->data->{sys}{database}{primary_db},
|
|
|
|
"Get->host_uuid" => $anvil->Get->host_uuid,
|
|
|
|
}});
|
|
|
|
if ($anvil->Get->host_uuid eq $anvil->data->{sys}{database}{primary_db})
|
|
|
|
{
|
|
|
|
$anvil->Database->_check_for_duplicates({debug => 2});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Something is causing broken manifests to be created. Until found, this removes them.
|
|
|
|
check_for_broken_manifests($anvil);
|
|
|
|
|
|
|
|
# This can take a while, but it's been optimized to minimize how long it takes to
|
|
|
|
# run. To be safe, we'll still background it.
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'striker-get-screenshots'}.$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
|
|
|
|
my ($output, $return_code) = $anvil->System->call({
|
|
|
|
background => 1,
|
|
|
|
shell_call => $shell_call,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
# Now check to see if it's time to run less frequent tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check})
|
|
|
|
{
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_type => $host_type,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut
|
|
|
|
# down our database.
|
|
|
|
if ($host_type eq "striker")
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
{
|
|
|
|
# If we're the active database, dump our database out and rsync it to our peers.
|
|
|
|
my $peers = keys %{$anvil->data->{database}};
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
my $connections = $anvil->data->{sys}{database}{connections};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peers => $peers,
|
|
|
|
connections => $connections,
|
|
|
|
}});
|
|
|
|
if (exists $anvil->data->{cache}{database_handle}{$host_uuid})
|
|
|
|
{
|
|
|
|
# Verify that the database is up.
|
|
|
|
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
|
|
if ($running)
|
|
|
|
{
|
|
|
|
# Backup our DB.
|
|
|
|
my $dump_file = $anvil->Database->backup_database({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
|
|
|
|
# Now rsync it to our peer(s)
|
|
|
|
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
next if $this_host_uuid eq $host_uuid;
|
|
|
|
|
|
|
|
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/";
|
|
|
|
my $password = $anvil->data->{database}{$this_host_uuid}{password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
this_host_uuid => $this_host_uuid,
|
|
|
|
destination => $destination,
|
|
|
|
password => $anvil->Log->is_secure($password),
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $start_time = time;
|
|
|
|
my $failed = $anvil->Storage->rsync({
|
|
|
|
debug => 3,
|
|
|
|
destination => $destination,
|
|
|
|
password => $password,
|
|
|
|
source => $dump_file,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
|
|
|
|
|
|
|
|
my $rsync_time = time - $start_time;
|
|
|
|
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
|
|
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
|
|
my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => {
|
|
|
|
file => $dump_file,
|
|
|
|
host_name => $target_name,
|
|
|
|
took => $rsync_time,
|
|
|
|
size => $size,
|
|
|
|
size_bytes => $size_bytes,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Reap old db_in_use states over 6 hours old.
|
|
|
|
my $query = "DELETE FROM states WHERE state_name LIKE 'db_in_use%' AND modified_date < (SELECT now() - interval '6 hour');\n";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
|
|
"s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
}});
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Now check to see if it's time to run daily tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_daily_check})
|
|
|
|
{
|
|
|
|
# Make sure ksm, ksmtuned and tuned are disabled.
|
|
|
|
foreach my $daemon ("ksm.service", "ksmtuned.service", "tuned.service")
|
|
|
|
{
|
|
|
|
my $status = $anvil->System->check_daemon({daemon => $daemon});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
daemon => $daemon,
|
|
|
|
status => $status,
|
|
|
|
}});
|
|
|
|
if ($status eq "1")
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0145", variables => { daemon => $daemon }});
|
|
|
|
$anvil->System->disable_daemon({
|
|
|
|
now => 1,
|
|
|
|
daemon => $daemon,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we
|
|
|
|
### don't use '--force' and let striker-manage-install-target skip the repo update if it happened
|
|
|
|
### recently enough.
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
### TODO: This is here only to handle the period of time where we disabled postgres
|
|
|
|
### on boot. This should be removed sometime after 2022-08-01
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
#$anvil->System->enable_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
|
|
|
|
|
|
# Record a job, don't call it directly. It takes too long to run.
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my ($last_age_out, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::aged-out"});
|
|
|
|
my $time_since_last_age_out = $last_age_out =~ /^\d+$/ ? time - $last_age_out : 100000;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:host_uuid' => $host_uuid,
|
|
|
|
's2:last_age_out' => $last_age_out,
|
|
|
|
's3:time_since_last_age_out' => $time_since_last_age_out,
|
|
|
|
}});
|
|
|
|
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
# Run an age-out?
|
|
|
|
if ($time_since_last_age_out > 86400)
|
|
|
|
{
|
|
|
|
# Age out old data. This takes up to a minute.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "database::".$host_uuid."::aged-out",
|
|
|
|
variable_value => time,
|
|
|
|
variable_default => "0",
|
|
|
|
variable_description => "striker_0302",
|
|
|
|
variable_section => "database",
|
|
|
|
variable_source_uuid => "NULL",
|
|
|
|
variable_source_table => "",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
$anvil->Database->_age_out_data();
|
|
|
|
}
|
|
|
|
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
# Run an archive?
|
|
|
|
my ($last_archive, undef, undef) = $anvil->Database->read_variable({variable_name => "database::".$host_uuid."::archived"});
|
|
|
|
my $time_since_last_archive = $last_archive =~ /^\d+$/ ? time - $last_archive : 100000;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:last_archive' => $last_archive,
|
|
|
|
's2:time_since_last_archive' => $time_since_last_archive,
|
|
|
|
}});
|
|
|
|
if ($time_since_last_archive > 86400)
|
|
|
|
{
|
|
|
|
# Archive old data
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "database::".$host_uuid."::archived",
|
|
|
|
variable_value => time,
|
|
|
|
variable_default => "0",
|
|
|
|
variable_description => "striker_0303",
|
|
|
|
variable_section => "database",
|
|
|
|
variable_source_uuid => "NULL",
|
|
|
|
variable_source_table => "",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
$anvil->Database->archive_database();
|
|
|
|
}
|
|
|
|
|
|
|
|
# Run the install target update?
|
|
|
|
my ($last_mit, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target"});
|
|
|
|
my $time_since_last_mit = $last_mit =~ /^\d+$/ ? time - $last_mit : 100000;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:last_mit' => $last_mit,
|
|
|
|
's2:time_since_last_mit' => $time_since_last_mit,
|
|
|
|
}});
|
|
|
|
if ($time_since_last_mit > 86400)
|
|
|
|
{
|
|
|
|
# Update the local install target data.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "jobs::last-ran::".$host_uuid."::manage-install-target",
|
|
|
|
variable_value => time,
|
|
|
|
variable_default => "0",
|
|
|
|
variable_description => "striker_0304",
|
|
|
|
variable_section => "jobs",
|
|
|
|
variable_source_uuid => "NULL",
|
|
|
|
variable_source_table => "",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "install-target::refresh",
|
|
|
|
job_title => "job_0015",
|
|
|
|
job_description => "job_0017",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { job_uuid => $job_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Update the OUI data?
|
|
|
|
my ($last_parse_oui, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-parse-oui"});
|
|
|
|
my $time_since_last_parse_oui = $last_parse_oui =~ /^\d+$/ ? time - $last_parse_oui : 100000;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:last_parse_oui' => $last_parse_oui,
|
|
|
|
's2:time_since_last_parse_oui' => $time_since_last_parse_oui,
|
|
|
|
}});
|
|
|
|
if ($time_since_last_parse_oui > 86400)
|
|
|
|
{
|
|
|
|
# Yup.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "jobs::last-ran::striker-parse-oui",
|
|
|
|
variable_value => time,
|
|
|
|
variable_default => "0",
|
|
|
|
variable_description => "striker_0305",
|
|
|
|
variable_section => "jobs",
|
|
|
|
variable_source_uuid => "NULL",
|
|
|
|
variable_source_table => "",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "oui-data::refresh",
|
|
|
|
job_title => "job_0064",
|
|
|
|
job_description => "job_0065",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Scan the network?
|
|
|
|
my ($last_network_scan, undef, undef) = $anvil->Database->read_variable({variable_name => "jobs::last-ran::striker-scan-network"});
|
|
|
|
my $time_since_last_network_scan = $last_network_scan =~ /^\d+$/ ? time - $last_network_scan : 100000;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:last_network_scan' => $last_network_scan,
|
|
|
|
's2:time_since_last_network_scan' => $time_since_last_network_scan,
|
|
|
|
}});
|
|
|
|
if ($time_since_last_parse_oui > 86400)
|
|
|
|
{
|
|
|
|
# Yup.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "jobs::last-ran::striker-scan-network",
|
|
|
|
variable_value => time,
|
|
|
|
variable_default => "0",
|
|
|
|
variable_description => "striker_0306",
|
|
|
|
variable_section => "jobs",
|
|
|
|
variable_source_uuid => "NULL",
|
|
|
|
variable_source_table => "",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "scan-network::refresh",
|
|
|
|
job_title => "job_0066",
|
|
|
|
job_description => "job_0067",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
|
|
"s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
### TODO: Find the source of the problem and fix it properly.
|
|
|
|
sub check_for_broken_manifests
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $query = "
|
|
|
|
SELECT
|
|
|
|
manifest_uuid
|
|
|
|
FROM
|
|
|
|
manifests
|
|
|
|
WHERE
|
|
|
|
manifest_name = '-anvil-'
|
|
|
|
;";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
my $count = @{$results};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
results => $results,
|
|
|
|
count => $count,
|
|
|
|
}});
|
|
|
|
if ($count)
|
|
|
|
{
|
|
|
|
foreach my $row (@{$results})
|
|
|
|
{
|
|
|
|
my $manifest_uuid = $row->[0];
|
|
|
|
|
|
|
|
my $queries = [];
|
|
|
|
push @{$queries}, "DELETE FROM history.manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";";
|
|
|
|
push @{$queries}, "DELETE FROM manifests WHERE manifest_uuid = ".$anvil->Database->quote($manifest_uuid).";";
|
|
|
|
|
|
|
|
foreach my $query (sort {$a cmp $b} @{$queries})
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0124", variables => { query => $query }});
|
|
|
|
}
|
|
|
|
$anvil->Database->write({debug => 2, query => $queries, source => $THIS_FILE, line => __LINE__});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
### NOTE: This logic plays out in a slightly different way in Database->shutdown().
|
|
|
|
# Check for stale db_in_use states.
|
|
|
|
sub check_db_in_use_states
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# We only reap db_in_use entries for us.
|
|
|
|
$anvil->System->pids({debug => 2});
|
|
|
|
my $query = "
|
|
|
|
SELECT
|
|
|
|
state_uuid,
|
|
|
|
state_name,
|
|
|
|
state_note
|
|
|
|
FROM
|
|
|
|
states
|
|
|
|
WHERE
|
|
|
|
state_name LIKE 'db_in_use::%'
|
|
|
|
AND
|
|
|
|
state_host_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
|
|
|
|
;";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
my $count = @{$results};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
results => $results,
|
|
|
|
count => $count,
|
|
|
|
}});
|
|
|
|
if ($count)
|
|
|
|
{
|
|
|
|
foreach my $row (@{$results})
|
|
|
|
{
|
|
|
|
my $state_uuid = $row->[0];
|
|
|
|
my $state_name = $row->[1];
|
|
|
|
my $state_note = $row->[2];
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:state_uuid' => $state_uuid,
|
|
|
|
's2:state_name' => $state_name,
|
|
|
|
's3:state_note' => $state_note,
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $caller = "";
|
|
|
|
my ($db_uuid, $state_pid) = ($state_name =~ /db_in_use::(.*?)::(.*)$/);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:db_uuid' => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")",
|
|
|
|
's2:state_pid' => $state_pid,
|
|
|
|
}});
|
|
|
|
if ($state_pid =~ /(\d+)::(.*)$/)
|
|
|
|
{
|
|
|
|
$state_pid = $1;
|
|
|
|
$caller = $2;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:state_pid' => $state_pid,
|
|
|
|
's2:caller' => $caller,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
if (not exists $anvil->data->{pids}{$state_pid})
|
|
|
|
{
|
|
|
|
# Reap the 'db_is_use'.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { state_name => $state_name }});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0140", variables => {
|
|
|
|
db => $anvil->Get->host_name_from_uuid({host_uuid => $db_uuid})." (".$db_uuid.")",
|
|
|
|
pid => $state_pid,
|
|
|
|
'caller' => $caller,
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $query = "DELETE FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
$anvil->Database->write({debug => 2, query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This checks to see if any files in /mnt/shared need to be dealt with, like incorporating files in
|
|
|
|
# /mnt/shared/incoming, etc.
|
|
|
|
sub check_incoming
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-manage-files'}." --check".$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({
|
|
|
|
shell_call => $shell_call,
|
|
|
|
source => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config
|
|
|
|
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing
|
|
|
|
# anything.
|
|
|
|
sub check_install_target
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $system_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }});
|
|
|
|
if ($system_type ne "striker")
|
|
|
|
{
|
|
|
|
# Not a dashboard, nothing to do.
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
my $status = "unavailable";
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }});
|
|
|
|
foreach my $line (split/\n/, $output)
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
|
|
|
|
|
|
if ($line =~ /status=(\d)/)
|
|
|
|
{
|
|
|
|
my $digit = $1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }});
|
|
|
|
|
|
|
|
if ($digit == 0)
|
|
|
|
{
|
|
|
|
$status = "disabled";
|
|
|
|
}
|
|
|
|
elsif ($digit == 1)
|
|
|
|
{
|
|
|
|
$status = "enabled";
|
|
|
|
}
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }});
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Record the status
|
|
|
|
$anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "install-target::enabled",
|
|
|
|
variable_source_uuid => $anvil->Get->host_uuid,
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_value => $status,
|
|
|
|
variable_default => "unavailable",
|
|
|
|
variable_description => "striker_0110",
|
|
|
|
variable_section => "system",
|
|
|
|
});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the
|
|
|
|
# daemon is restarted or reloaded.
|
|
|
|
sub run_once
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Make sure the firewall is configured.
|
|
|
|
$anvil->Network->manage_firewall();
|
|
|
|
|
|
|
|
# Check that the database is ready.
|
|
|
|
prep_database($anvil);
|
|
|
|
|
|
|
|
# Check to see if we need to do boot-time tasks. We only run these if we've just booted
|
|
|
|
boot_time_tasks($anvil);
|
|
|
|
|
|
|
|
# Check the ssh stuff.
|
|
|
|
# NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well.
|
|
|
|
$anvil->System->check_ssh_keys();
|
|
|
|
|
|
|
|
# Check setuid wrappers
|
|
|
|
check_setuid_wrappers($anvil);
|
|
|
|
|
|
|
|
# Check journald is configured for persistent storage.
|
|
|
|
check_journald($anvil);
|
|
|
|
|
|
|
|
# Make sure root can ssh
|
|
|
|
chech_sshd($anvil);
|
|
|
|
|
|
|
|
if ($anvil->data->{switches}{'startup-only'})
|
|
|
|
{
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub chech_sshd
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# On EL8, the 'sshd_config.d' directory doesn't exist and root is enabled.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'path::directories::sshd_config.d' => $anvil->data->{path}{directories}{'sshd_config.d'} }});
|
|
|
|
if (not -d $anvil->data->{path}{directories}{'sshd_config.d'})
|
|
|
|
{
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'path::configs::sshd_root_password' => $anvil->data->{path}{configs}{sshd_root_password} }});
|
|
|
|
if (not -f $anvil->data->{path}{configs}{sshd_root_password})
|
|
|
|
{
|
|
|
|
# Write it out
|
|
|
|
my $body = "# This file was added to enable root login by password, which is needed while
|
|
|
|
# forming the Anvil! cluster. Once the cluster is formed, passwordless SSH
|
|
|
|
# should be enabled and you can disable this feature. Please remove during a
|
|
|
|
# maintanence window or after testing in a lab environment.
|
|
|
|
PermitRootLogin yes
|
|
|
|
";
|
|
|
|
# Update the config
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
debug => 2,
|
|
|
|
secure => 0,
|
|
|
|
file => $anvil->data->{path}{configs}{sshd_root_password},
|
|
|
|
body => $body,
|
|
|
|
mode => "0644",
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "message_0418", variables => { file => $anvil->data->{path}{configs}{sshd_root_password} }});
|
|
|
|
|
|
|
|
# Restart the journald service.
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart sshd.service";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub check_journald
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Check the journald.conf to ensure logging in configured to be persistent.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
|
|
my $peristent_seen = 0;
|
|
|
|
my $change_storage = 0;
|
|
|
|
my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }});
|
|
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
|
|
if ($line =~ /^Storage=(.*)$/)
|
|
|
|
{
|
|
|
|
my $value = $1;
|
|
|
|
if ($value eq "persistent")
|
|
|
|
{
|
|
|
|
$peristent_seen = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$change_storage = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure the journald directory
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }});
|
|
|
|
if (not -d $anvil->data->{path}{directories}{journald})
|
|
|
|
{
|
|
|
|
$anvil->Storage->make_directory({
|
|
|
|
debug => 2,
|
|
|
|
directory => $anvil->data->{path}{directories}{journald},
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure the journald is configured for persistent storage.
|
|
|
|
if (not $peristent_seen)
|
|
|
|
{
|
|
|
|
my $storage_added = 0;
|
|
|
|
my $new_journald_conf = "";
|
|
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
|
|
{
|
|
|
|
if (($line =~ /^Storage=/) && ($change_storage))
|
|
|
|
{
|
|
|
|
if (not $storage_added)
|
|
|
|
{
|
|
|
|
$storage_added = 1;
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
if (($line =~ /^#Storage=/) && (not $storage_added))
|
|
|
|
{
|
|
|
|
$storage_added = 1;
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
$new_journald_conf .= $line."\n";
|
|
|
|
}
|
|
|
|
if (not $storage_added)
|
|
|
|
{
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }});
|
|
|
|
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
debug => 3,
|
|
|
|
secure => 0,
|
|
|
|
file => $anvil->data->{path}{configs}{'journald.conf'},
|
|
|
|
body => $new_journald_conf,
|
|
|
|
mode => "0644",
|
|
|
|
overwrite => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
|
|
|
|
|
|
# Restart the journald service.
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This creates, as needed, the setuid wrappers used by striker-ui-api to make certain system calls.
|
|
|
|
sub check_setuid_wrappers
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
|
|
|
|
if ($host_type ne "striker")
|
|
|
|
{
|
|
|
|
# Not a dashboard, setuid scripts aren't needed.
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Does the call_striker-get-peer-data wrapper exist yet?
|
|
|
|
if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
|
|
{
|
|
|
|
# Exists, skipping.
|
* Got the node/dr host initialization form to the point where it can test access and decide if it should show the Red Hat account form. Decided that for M3, node/dr host setup will now be a four-stage process; initial install (over PXE), initialization (install the proper anvil-{node,dr} RPM and connect to the database), setup/map the network, and then add to an Anvil! pair.
* Updated striker to no longer try to SSH to a remote machine. To enable this, we'd have to give apache a shell and an SSH key, which is dumb and dangerous when considered.
* Created tools/striker-get-peer-data which is meant to be invoked as the 'admin' user (via a setuid c-wrapper). It collects basic data about a target machine and reports what it finds on STDOUT. It gets the password for the target via the database.
* Updated anvil-daemon to check/create/update setuid c-wrapper(s), which for now is limited to call_striker-initialize-host.
* Created Anvil/Tools/Striker.pm to store Striker web-specific methods, including get_peer_data() which calls tools/striker-initialize-host via the setuid admin call_striker-initialize-host c-wrapper.
* In order to allow striker via apache to read a peer's anvil.version, which it can no longer do over SSH, any connection to a peer where the anvil.version is read is cached as /etc/anvil/anvil.<peer>.version. When Get->anvil_version is called as 'apache', this file is read instead.
* Updated Database->resync_databases() and ->_find_behind_databases() to ignore the 'states' table.
* Created tools/striker-initialize-host which will be called as a job to initialize a node/dr host.
Signed-off-by: Digimer <digimer@alteeve.ca>
5 years ago
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# What is the admin user and group ID?
|
|
|
|
my $admin_uid = getpwnam('admin');
|
|
|
|
my $admin_gid = getgrnam('admin');
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
admin_uid => $admin_uid,
|
|
|
|
admin_gid => $admin_gid,
|
|
|
|
}});
|
|
|
|
next if not $admin_uid;
|
|
|
|
next if not $admin_gid;
|
|
|
|
|
|
|
|
# Write the body out
|
|
|
|
my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n";
|
|
|
|
$call_striker_get_peer_data_body .= "main(ac, av)\n";
|
|
|
|
$call_striker_get_peer_data_body .= "char **av;\n";
|
|
|
|
$call_striker_get_peer_data_body .= "{\n";
|
|
|
|
$call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n";
|
|
|
|
$call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n";
|
|
|
|
$call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n";
|
|
|
|
$call_striker_get_peer_data_body .= "}\n";
|
|
|
|
my $error = $anvil->Storage->write_file({
|
|
|
|
debug => 3,
|
|
|
|
file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
|
|
body => $call_striker_get_peer_data_body,
|
|
|
|
mode => '644',
|
|
|
|
overwrite => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }});
|
|
|
|
|
|
|
|
# If it wrote out, compile it.
|
|
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c")
|
|
|
|
{
|
|
|
|
# Failed to write.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Compile it
|
|
|
|
my ($output, $return_code) = $anvil->System->call({
|
|
|
|
debug => 3,
|
|
|
|
shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If it compiled, setuid it.
|
|
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
|
|
{
|
|
|
|
# Something went wrong compiling it.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$anvil->Storage->change_owner({
|
|
|
|
debug => 3,
|
|
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
|
|
user => 'root',
|
|
|
|
group => 'root',
|
|
|
|
});
|
|
|
|
$anvil->Storage->change_mode({
|
|
|
|
debug => 3,
|
|
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
|
|
mode => '4755',
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Configure/update the firewall.
|
|
|
|
sub check_firewall
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
|
|
|
|
# Don't call this if we're not configured yet.
|
|
|
|
my $configured = $anvil->System->check_if_configured({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
|
|
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
if ($configured)
|
|
|
|
{
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles tasks that need to run on boot (if any)
|
|
|
|
sub boot_time_tasks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# If the uptime is less than ten minutes, clear the reboot flag.
|
|
|
|
my $uptime = $anvil->Get->uptime;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }});
|
|
|
|
|
|
|
|
# Now find out if a reboot is listed as needed and when it was last changed.
|
|
|
|
my $reboot_needed = 0;
|
|
|
|
my $changed_seconds_ago = 0;
|
|
|
|
my $query = "
|
|
|
|
SELECT
|
|
|
|
variable_value,
|
|
|
|
(SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago
|
|
|
|
FROM
|
|
|
|
variables
|
|
|
|
WHERE
|
|
|
|
variable_source_table = 'hosts'
|
|
|
|
AND
|
|
|
|
variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
|
|
|
|
AND
|
|
|
|
variable_name = 'reboot::needed'
|
|
|
|
;";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
my $count = @{$results};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
results => $results,
|
|
|
|
count => $count,
|
|
|
|
}});
|
|
|
|
if ($count)
|
|
|
|
{
|
|
|
|
$reboot_needed = $results->[0]->[0];
|
|
|
|
$changed_seconds_ago = $results->[0]->[1];
|
|
|
|
$changed_seconds_ago =~ s/^(\d+)\..*$/$1/;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
reboot_needed => $reboot_needed,
|
|
|
|
changed_seconds_ago => $changed_seconds_ago,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
### TODO: This shouldn't be needed anymore. anvil-manage-power doesn't set the progress to '50' prior
|
|
|
|
### to reboot anymore.
|
|
|
|
# If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was
|
|
|
|
# set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to
|
|
|
|
# Lisa Seelye (@thedoh) for this idea!
|
|
|
|
my $difference = ($changed_seconds_ago - $uptime);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"s1:reboot_needed" => $reboot_needed,
|
|
|
|
"s2:changed_seconds_ago" => $changed_seconds_ago,
|
|
|
|
"s3:uptime" => $uptime,
|
|
|
|
"s4:difference" => $difference,
|
|
|
|
}});
|
|
|
|
if ($reboot_needed)
|
|
|
|
{
|
|
|
|
if ($uptime < $changed_seconds_ago)
|
|
|
|
{
|
|
|
|
# Clear the reboot request.
|
|
|
|
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
|
|
|
|
|
|
|
|
# Check to see if there was a reboot job in progress. If so, finish it off.
|
|
|
|
my $job_uuid = $anvil->Job->get_job_uuid({
|
|
|
|
debug => 2,
|
|
|
|
program => "anvil-manage-power",
|
|
|
|
incomplete => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
|
|
|
|
if ($job_uuid)
|
|
|
|
{
|
|
|
|
# Update the percentage to '100' and then clear the old PID.
|
|
|
|
my $date_time = $anvil->Get->date_and_time();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }});
|
|
|
|
|
|
|
|
$anvil->Job->update_progress({
|
|
|
|
progress => 100,
|
|
|
|
message => "message_0064,!!date_and_time!".$date_time."!!",
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
picked_up_by => 0,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Update our status
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"});
|
|
|
|
|
|
|
|
$anvil->Database->get_hosts({debug => 2});
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }});
|
|
|
|
$anvil->Database->insert_or_update_hosts({
|
|
|
|
debug => 2,
|
|
|
|
host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi},
|
|
|
|
host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key},
|
|
|
|
host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name},
|
|
|
|
host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type},
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
host_status => "online",
|
|
|
|
});
|
|
|
|
|
|
|
|
# Make sure our stop reason is cleared.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => 'system::stop_reason',
|
|
|
|
variable_value => '',
|
|
|
|
variable_default => '',
|
|
|
|
variable_description => 'striker_0279',
|
|
|
|
variable_section => 'system',
|
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
variable_source_table => 'hosts',
|
|
|
|
});
|
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure /etc/hosts is updated.
|
|
|
|
$anvil->System->update_hosts({debug => 2});
|
|
|
|
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
|
|
handle_special_cases($anvil);
|
|
|
|
|
|
|
|
# Now look for jobs that have a job status of 'anvil_startup'
|
|
|
|
run_jobs($anvil, 1);
|
|
|
|
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
check_firewall($anvil);
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
|
|
sub handle_special_cases
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Thsi is now handled by 'anvil-version-changes'
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-version-changes'}.$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
|
|
|
|
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
states_output => $states_output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Configure the local database, if needed.
|
|
|
|
sub prep_database
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# If there's a backup file, we're configured and possibly just off.
|
|
|
|
my $prep_database = 1;
|
|
|
|
foreach my $uuid (keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/anvil_db_dump.".$uuid.".sql";
|
|
|
|
$dump_file =~ s/\/\//\//g;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
if (-e $dump_file)
|
|
|
|
{
|
|
|
|
# No need to prepare.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0665", variables => { file => $dump_file }});
|
|
|
|
$prep_database = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { prep_database => $prep_database }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Only run this if we're a dashboard.
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
prep_database => $prep_database,
|
|
|
|
"sys::database::connections" => $anvil->data->{sys}{database}{connections},
|
|
|
|
}});
|
|
|
|
if ($prep_database)
|
|
|
|
{
|
|
|
|
$anvil->Database->configure_pgsql({debug => 2})
|
|
|
|
# ### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging.
|
|
|
|
# my $shell_call = $anvil->data->{path}{exe}{'striker-prep-database'}$anvil->Log->switches;
|
|
|
|
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
# my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__ });
|
|
|
|
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
# database_output => $database_output,
|
|
|
|
# return_code => $return_code,
|
|
|
|
# }});
|
|
|
|
}
|
|
|
|
elsif (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Start the daemon locally, if needed.
|
|
|
|
my $running = $anvil->System->check_daemon({daemon => "postgresql"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { running => $running }});
|
|
|
|
if ($running == 2)
|
|
|
|
{
|
|
|
|
# Not installed, nothing to do.
|
|
|
|
}
|
|
|
|
elsif (not $running)
|
|
|
|
{
|
|
|
|
# Start it.
|
|
|
|
my $return_code = $anvil->System->start_daemon({daemon => "postgresql"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# These are tools that need to keep running.
|
|
|
|
sub keep_running
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
# Check for jobs that were running and now exited.
|
|
|
|
if (exists $anvil->data->{processes})
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
{
|
|
|
|
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}})
|
|
|
|
{
|
|
|
|
# If it's not a handle, delete it.
|
|
|
|
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
|
|
running => $running,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If it's not running, update the table to clear the 'job_picked_up_by' column.
|
|
|
|
if (not $running)
|
|
|
|
{
|
|
|
|
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
exit_status => $exit_status,
|
|
|
|
}});
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
|
|
|
|
# Free up memory
|
|
|
|
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup();
|
|
|
|
|
|
|
|
$anvil->Job->clear({job_uuid => $job_uuid});
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# If we're configured, write out the status JSON file. If we're not configured, Update hardware state files.
|
|
|
|
my $configured = $anvil->System->check_if_configured;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
|
|
if ($configured)
|
|
|
|
{
|
|
|
|
# Write out state information for all known Anvil! systems and the information from
|
|
|
|
# unconfigured nods and DR hosts, using just database data (hence, fast enough to run
|
|
|
|
# constantly).
|
|
|
|
$anvil->System->generate_state_json({debug => 2});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process.
|
|
|
|
run_jobs($anvil, 0);
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made
|
|
|
|
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is
|
|
|
|
# invoked to handle it.
|
|
|
|
sub run_jobs
|
|
|
|
{
|
|
|
|
my ($anvil, $startup) = @_;
|
The main change in this commit deals with anvil-daemon startup. During OS updates, it would pick up the queued update job and run it while the other --no-db one was still running. This could become an issue for other tasks in the future, so updated anvil-daemon to not run any jobs for the first minute after startup. Also updated it to see if an OS update is underway (given how it can start mid-RPM update, before packages like kmod-drbd are ready to build). While doing this, implemented caching of daily tasks (like agine out data, archiving data, network scans, etc) to only run once per day, period. As it was before, they would always run on anvil-daemon startup, then wait 24 hours.
Note that work has started it reworking anvil-update-system, but it is incomplete (and broken) in this commit.
Signed-off-by: digimer <mkelly@alteeve.ca>
1 year ago
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { startup => $startup }});
|
|
|
|
|
|
|
|
# Don't start jobs for 30 seconds after startup.
|
|
|
|
if (not $startup)
|
|
|
|
{
|
|
|
|
my $time_since_start = time - $start_time;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
time_since_start => $time_since_start,
|
|
|
|
start_time => $start_time,
|
|
|
|
}});
|
|
|
|
if ($time_since_start < 60)
|
|
|
|
{
|
|
|
|
# Log that we'll start jobs in X seconds.
|
|
|
|
my $will_start_in = 60 - $time_since_start;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "message_0326", variables => { will_start_in => $will_start_in }});
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
}
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
|
|
|
|
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's
|
|
|
|
# changed on disk.
|
|
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
|
|
|
|
# We'll also update the jobs.json file.
|
|
|
|
my $jobs_file = "{\"jobs\":[\n";
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
# Get a list of pending or incomplete jobs.
|
|
|
|
my $ended_within = $startup ? 1 : 300;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ended_within => $ended_within }});
|
|
|
|
|
|
|
|
$anvil->Database->get_jobs({
|
|
|
|
debug => 2,
|
|
|
|
ended_within => $ended_within,
|
|
|
|
});
|
|
|
|
foreach my $modified_date (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}})
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { modified_date => $modified_date }});
|
|
|
|
foreach my $job_uuid (sort {$a cmp $b} keys %{$anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}})
|
|
|
|
{
|
|
|
|
# Reload the jobs so we get an updated view of them.
|
|
|
|
$anvil->Database->get_jobs({
|
|
|
|
debug => 2,
|
|
|
|
ended_within => $ended_within,
|
|
|
|
});
|
|
|
|
|
|
|
|
# Collect the data.
|
|
|
|
my $job_command = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_command};
|
|
|
|
my $short_command = $job_command;
|
|
|
|
$short_command =~ s/\s.*$//;
|
|
|
|
my $job_data = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_data};
|
|
|
|
my $job_picked_up_by = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_by};
|
|
|
|
my $job_picked_up_at = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_picked_up_at};
|
|
|
|
my $job_updated = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_updated};
|
|
|
|
my $job_name = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_name};
|
|
|
|
my $job_progress = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_progress};
|
|
|
|
my $job_title = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_title};
|
|
|
|
my $job_description = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_description};
|
|
|
|
my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status};
|
|
|
|
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0;
|
|
|
|
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's01:job_uuid' => $job_uuid,
|
|
|
|
's02:job_command' => $job_command,
|
|
|
|
's03:short_command' => $short_command,
|
|
|
|
's04:job_data' => $job_data,
|
|
|
|
's05:job_picked_up_by' => $job_picked_up_by,
|
|
|
|
's06:job_picked_up_at' => $job_picked_up_at,
|
|
|
|
's07:job_updated' => $job_updated,
|
|
|
|
's08:job_name' => $job_name,
|
|
|
|
's09:job_progress' => $job_progress,
|
|
|
|
's10:job_title' => $job_title,
|
|
|
|
's11:job_description' => $job_description,
|
|
|
|
's12:job_status' => $job_status,
|
|
|
|
's13:started_seconds_ago' => $started_seconds_ago,
|
|
|
|
's14:updated_seconds_ago' => $updated_seconds_ago,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If we're not configured, we will only run the 'anvil-configure-host' job
|
|
|
|
my $configured = $anvil->System->check_if_configured;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { configured => $configured }});
|
|
|
|
|
|
|
|
if ((not $configured) && ($job_command !~ /anvil-configure-host/))
|
|
|
|
{
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
# To minimize the chance of race conditions, any given command will be called only
|
|
|
|
# once at a time. If two jobs of the same command exist, only one will be called.
|
|
|
|
if ($job_progress != 100)
|
|
|
|
{
|
|
|
|
if (exists $anvil->data->{sys}{started}{$short_command})
|
|
|
|
{
|
|
|
|
# Skip it.
|
|
|
|
my $started_job = $anvil->data->{sys}{started}{$short_command};
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0737", variables => {
|
|
|
|
started_job => $started_job,
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
command => $short_command,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
$anvil->data->{sys}{started}{$short_command} = $job_uuid;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::started::${short_command}" => $anvil->data->{sys}{started}{$short_command} }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
|
|
|
|
if (($startup) && ($configured) && ($job_status ne "anvil_startup"))
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
job_command => $job_command,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($job_progress == 100)
|
|
|
|
{
|
|
|
|
# This is a job that might have just completed, clear the started value.
|
|
|
|
$anvil->data->{jobs}{$job_uuid}{started} = 0;
|
|
|
|
$job_picked_up_at = 0;
|
|
|
|
$job_picked_up_by = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_picked_up_at => $job_picked_up_at,
|
|
|
|
job_picked_up_by => $job_picked_up_by,
|
|
|
|
"jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$anvil->data->{sys}{jobs_running} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# See if the job was picked up by a now-dead instance.
|
|
|
|
if ($job_picked_up_by)
|
|
|
|
{
|
|
|
|
# Check if the PID is still active.
|
|
|
|
$anvil->System->pids({ignore_me => 1});
|
|
|
|
|
|
|
|
### TODO: Add a check to verify the job isn't hung.
|
|
|
|
# Skip if this job is in progress.
|
|
|
|
if (not exists $anvil->data->{pids}{$job_picked_up_by})
|
|
|
|
{
|
|
|
|
# If the job is done, just clear the 'job_picked_up_by' and be done.
|
|
|
|
if ($job_progress ne "100")
|
|
|
|
{
|
|
|
|
# It's possible that the job updated to 100% and exited after
|
|
|
|
# we gathered the job data, so we won't restart until we've
|
|
|
|
# seen it not running and not at 100% after 5 loops.
|
|
|
|
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid}))
|
|
|
|
{
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
if ($anvil->data->{lost_job_count}{$job_uuid} > 5)
|
|
|
|
{
|
|
|
|
# The previous job is gone, but the job isn't
|
|
|
|
# finished. Start it again.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
pid => $job_picked_up_by,
|
|
|
|
percent => $job_progress,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Clear some variables.
|
|
|
|
$job_progress = 0;
|
|
|
|
$job_status = "message_0056";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_progress => $job_progress,
|
|
|
|
job_status => $job_status,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Clear the job.
|
|
|
|
$anvil->Job->clear({debug => 2, job_uuid => $job_uuid});
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid}++;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Clear the PID
|
|
|
|
$job_picked_up_by = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }});
|
|
|
|
}
|
|
|
|
elsif ($job_progress ne "100")
|
|
|
|
{
|
|
|
|
# The job is running.
|
|
|
|
$anvil->data->{jobs_started}{$short_command} = $job_uuid;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Convert the double-banged strings into a proper message.
|
|
|
|
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : "";
|
|
|
|
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : "";
|
|
|
|
my $say_status = $job_status ? $anvil->Words->parse_banged_string({key_string => $job_status}) : "";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_title => $job_title,
|
|
|
|
say_description => $say_description,
|
|
|
|
say_status => $say_status,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Make the status HTML friendly. Strip any embedded HTML then encode the text string.
|
|
|
|
if ($say_status)
|
|
|
|
{
|
|
|
|
my $html_strip = HTML::Strip->new();
|
|
|
|
$say_status = $html_strip->parse($say_status);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
|
|
|
|
|
|
# Now make the resulting text string HTML friendly
|
|
|
|
my $text_to_html = HTML::FromText->new({
|
|
|
|
urls => 1,
|
|
|
|
email => 1,
|
|
|
|
lines => 1,
|
|
|
|
});
|
|
|
|
$say_status = $text_to_html->parse($say_status);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Add this to the jobs.json file
|
|
|
|
my $json_string = to_json ({
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
job_command => $job_command,
|
|
|
|
job_data => $job_data,
|
|
|
|
job_picked_up_at => $job_picked_up_at,
|
|
|
|
job_updated => $job_updated,
|
|
|
|
job_name => $job_name,
|
|
|
|
job_progress => $job_progress,
|
|
|
|
job_title => $say_title,
|
|
|
|
job_description => $say_description,
|
|
|
|
job_status => $say_status,
|
|
|
|
started_seconds_ago => $started_seconds_ago,
|
|
|
|
updated_seconds_ago => $updated_seconds_ago,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }});
|
|
|
|
$jobs_file .= $json_string.",\n";
|
|
|
|
|
|
|
|
# If the job is done, move on.
|
|
|
|
next if $job_progress == 100;
|
|
|
|
next if $anvil->data->{switches}{'no-start'};
|
|
|
|
|
|
|
|
# If 'startup' is set, we only care if 'job_status' is 'anvil_startup'
|
|
|
|
if ((not $startup) && ($say_status eq "anvil_startup"))
|
|
|
|
{
|
|
|
|
# Skip this, it will run next time anvil-daemon restarts.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
# If the job is not running, and we've not started any other of the same command this
|
|
|
|
# loop, start it.
|
|
|
|
if (not $job_picked_up_by)
|
|
|
|
{
|
|
|
|
if (exists $anvil->data->{jobs_started}{$short_command})
|
|
|
|
{
|
|
|
|
# Is the job_uuid associated with this command done?
|
|
|
|
my $started_job_uuid = $anvil->data->{jobs_started}{$short_command};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { started_job_uuid => $started_job_uuid }});
|
|
|
|
|
|
|
|
if (exists $anvil->data->{jobs}{running}{$started_job_uuid})
|
|
|
|
{
|
|
|
|
# If the previously running job and this job have the same
|
|
|
|
# UUID, it failed and needs to restart.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
started_job_uuid => $started_job_uuid,
|
|
|
|
"jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress},
|
|
|
|
}});
|
|
|
|
if ($started_job_uuid eq $job_uuid)
|
|
|
|
{
|
|
|
|
# We're restarting.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
elsif ($anvil->data->{jobs}{running}{$started_job_uuid}{job_progress} != 100)
|
|
|
|
{
|
|
|
|
# Don't start it in this pass.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
this_job_uuid => $job_uuid,
|
|
|
|
other_job_uuid => $started_job_uuid,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# The previous job is done, delete it.
|
|
|
|
$anvil->data->{jobs_started}{$short_command} = "";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
my $command = $job_command." --job-uuid ".$job_uuid;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }});
|
|
|
|
|
|
|
|
# Have we started this job recently?
|
|
|
|
if (exists $anvil->data->{jobs}{$job_uuid}{started})
|
|
|
|
{
|
|
|
|
my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }});
|
|
|
|
|
|
|
|
if ($last_start < 60)
|
|
|
|
{
|
|
|
|
# Skip, Started too recently.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => {
|
|
|
|
command => $command,
|
|
|
|
last_start => $last_start,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Start the job, appending '--job-uuid' to the command.
|
|
|
|
($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({
|
|
|
|
background => 1,
|
|
|
|
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout",
|
|
|
|
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr",
|
|
|
|
shell_call => $command,
|
|
|
|
source => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Log the PID (the job should update the database).
|
|
|
|
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
|
|
|
|
|
|
|
|
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute.
|
|
|
|
$anvil->data->{jobs}{$job_uuid}{started} = time;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs::${job_uuid}::started" => $anvil->data->{jobs}{$job_uuid}{started} }});
|
|
|
|
|
|
|
|
# Record that a job with this command has started
|
|
|
|
$anvil->data->{jobs_started}{$short_command} = $job_uuid;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs_started::${short_command}" => $anvil->data->{jobs_started}{$short_command} }});
|
|
|
|
}
|
|
|
|
}
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
7 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Close the jobs file.
|
|
|
|
$jobs_file =~ s/,\n$/\n/ms;
|
|
|
|
$jobs_file .= "]}\n";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }});
|
|
|
|
|
|
|
|
# Write the JSON file
|
|
|
|
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }});
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
file => $output_json,
|
|
|
|
body => $jobs_file,
|
|
|
|
overwrite => 1,
|
|
|
|
backup => 0,
|
|
|
|
mode => "0644",
|
|
|
|
user => "striker-ui-api",
|
|
|
|
group => "striker-ui-api",
|
|
|
|
});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
sub check_files
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Make sure the shared directories exist.
|
|
|
|
foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}})
|
|
|
|
{
|
|
|
|
my $directory = $anvil->data->{path}{directories}{shared}{$target};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
target => $target,
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
if (-e $directory)
|
|
|
|
{
|
|
|
|
# Make sure the permissions are correct.
|
|
|
|
$anvil->Storage->get_file_stats({file_path => $directory});
|
|
|
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"file_stat::${directory}::unix_mode" => $anvil->data->{file_stat}{$directory}{unix_mode},
|
|
|
|
}});
|
|
|
|
if ($anvil->data->{file_stat}{$directory}{unix_mode} !~ /0777$/)
|
|
|
|
{
|
|
|
|
$anvil->Storage->change_mode({
|
|
|
|
debug => 2,
|
|
|
|
path => $directory,
|
|
|
|
mode => "0777"
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (not -e $anvil->data->{path}{directories}{shared}{$target})
|
|
|
|
{
|
|
|
|
my $failed = $anvil->Storage->make_directory({
|
|
|
|
directory => $directory,
|
|
|
|
group => "striker-ui-api",
|
|
|
|
user => "striker-ui-api",
|
|
|
|
mode => "0777",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }});
|
|
|
|
if ($failed)
|
|
|
|
{
|
|
|
|
# Something went wrong.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => {
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Success
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => {
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Look for files on our system that are in file_locations. If they're shown as ready, make sure
|
|
|
|
# they're there. If they're marked as not ready, see if they now are.
|
|
|
|
$anvil->Storage->check_files({debug => 2});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|