|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# This is the master daemon that manages all periodically run processes on Striker dashboards, Anvil! cluster
|
|
|
|
# nodes and DR hosts.
|
|
|
|
#
|
|
|
|
# Exit codes;
|
|
|
|
# 0 = Normal exit or md5sum of this program changed and it exited to reload.
|
|
|
|
# 1 = Not running as root.
|
|
|
|
# 2 = Unable to connect to any database, even after trying to initialize the local system.
|
|
|
|
#
|
|
|
|
# TODO:
|
|
|
|
# - Need to check what kind of machine this is and not prep the database unless its a dashboard.
|
|
|
|
# - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs
|
|
|
|
# - Decide if holding before the main loop until 'systemctl is-system-running' returns 'running' is a good
|
|
|
|
# idea or not.
|
|
|
|
# - Write the status of this and the scancore daemon to /etc/anvil/anvil.motd and symlink it to /etc/motd.d/
|
|
|
|
# - Write a script that runs in crontab at UTC 17:00 that sends an email if Scancore or anvil-daemon are disabled.
|
|
|
|
# - Examine limites in: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LimitCPU=
|
|
|
|
# - Write a background program to scan the BCN and uses OUI data to try and find / auto-configure PDUs and UPSes
|
|
|
|
# -
|
|
|
|
# - Increase DRBD's default timeout
|
|
|
|
# - Check for and enable persistent journald logging
|
|
|
|
#
|
|
|
|
# NOTE:
|
|
|
|
# - For later; 'reboot --force --force' immediately kills the OS, like disabling ACPI on EL6 and hitting the
|
|
|
|
# power button. Might be useful in ScanCore down the road.
|
|
|
|
#
|
|
|
|
# Switches:
|
|
|
|
#
|
|
|
|
# --main-loop-only
|
|
|
|
#
|
|
|
|
# This skips the one-time, start-up tasks and just goes into the main-loop,
|
|
|
|
#
|
|
|
|
# --no-start
|
|
|
|
#
|
|
|
|
# This will prevent any pending jobs from being picked up and started in this run. Note that other job checks will still happen.
|
|
|
|
#
|
|
|
|
# --refresh-json
|
|
|
|
#
|
|
|
|
# This just updates the JSON files used by the web interface. It is the same as '--run-once --main-loop-only --no-start'
|
|
|
|
#
|
|
|
|
# --run-once
|
|
|
|
#
|
|
|
|
# This will tell the program to exit after runn the main loop once.
|
|
|
|
#
|
|
|
|
# --startup-only
|
|
|
|
#
|
|
|
|
# This will tell the program to exit after running the start up tasks, so the main loop won't run.
|
|
|
|
#
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use Anvil::Tools;
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
use Proc::Simple;
|
|
|
|
#use Time::HiRes qw ( time sleep );
|
|
|
|
use JSON;
|
|
|
|
use HTML::Strip;
|
|
|
|
use HTML::FromText;
|
|
|
|
use Data::Dumper;
|
|
|
|
use Text::Diff;
|
|
|
|
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
|
|
{
|
|
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
# Prevent a discrepency between UID/GID and EUID/EGID from throwing an error.
|
|
|
|
$< = $>;
|
|
|
|
$( = $);
|
|
|
|
|
|
|
|
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
|
|
|
|
# in the loop as well to override defaults in code.
|
|
|
|
my $anvil = Anvil::Tools->new();
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
|
|
|
|
# Make sure we're running as 'root'
|
|
|
|
# $< == real UID, $> == effective UID
|
|
|
|
if (($< != 0) && ($> != 0))
|
|
|
|
{
|
|
|
|
# Not root
|
|
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
# If, so some reason, anvil.conf is lost, create it.
|
|
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
|
|
# is to setup the database server.
|
|
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
|
|
|
|
|
|
# If I have no databases, sleep for a second and then exit (systemd will restart us).
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
|
|
# dashboard, then just go into a loop waiting for a database to be configured.
|
|
|
|
if ($anvil->Get->host_type eq "striker")
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0201"});
|
|
|
|
prep_database($anvil);
|
|
|
|
|
|
|
|
# Try connecting again
|
|
|
|
$anvil->refresh();
|
|
|
|
$anvil->Database->connect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Still nothing, sleep and exit.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"});
|
|
|
|
$anvil->nice_exit({exit_code => 2});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Wait until we have one.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0075"});
|
|
|
|
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
sleep 10;
|
|
|
|
|
|
|
|
check_network($anvil);
|
|
|
|
$anvil->refresh();
|
|
|
|
$anvil->Database->connect({check_if_configured => 1, check_for_resync => 1});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0439"});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Read switches
|
|
|
|
$anvil->data->{switches}{'refresh-json'} = "";
|
|
|
|
$anvil->data->{switches}{'run-once'} = 0;
|
|
|
|
$anvil->data->{switches}{'main-loop-only'} = 0;
|
|
|
|
$anvil->data->{switches}{'no-start'} = 0;
|
|
|
|
$anvil->data->{switches}{'startup-only'} = 0;
|
|
|
|
$anvil->Get->switches;
|
|
|
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
|
|
|
|
if ($anvil->data->{switches}{'refresh-json'})
|
|
|
|
{
|
|
|
|
$anvil->data->{switches}{'run-once'} = 1;
|
|
|
|
$anvil->data->{switches}{'main-loop-only'} = 1;
|
|
|
|
$anvil->data->{switches}{'no-start'} = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
# This is used to track initial checkes / repairs of network issues.
|
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 0;
|
|
|
|
|
|
|
|
# There are some things we only want to run on (re)start and don't need to always run.
|
|
|
|
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'};
|
|
|
|
|
|
|
|
# Calculate my sum so that we can exit if it changes later.
|
|
|
|
$anvil->Storage->record_md5sums;
|
|
|
|
|
|
|
|
# What time is it, Mr. Fox?
|
|
|
|
my $now_time = time;
|
|
|
|
|
|
|
|
# To avoid multiple dashboards running a network scan and OUI parse, the dashboard peer with the lowest
|
|
|
|
# host_uuid sets it's daily checks to run now, and the other(s) will get a two hour's delay.
|
|
|
|
my $delay = set_delay($anvil);
|
|
|
|
|
|
|
|
# Once a minute, we'll check the md5sums and see if we should restart.
|
|
|
|
# Once a day, we'll refresh an Install Target's RPM repository (has no effect on non-Striker dashboards).
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
$anvil->data->{timing}{minute_checks} = 60;
|
|
|
|
$anvil->data->{timing}{ten_minute_checks} = 600;
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
$anvil->data->{timing}{daily_checks} = 86400;
|
|
|
|
$anvil->data->{timing}{repo_update_interval} = 86400;
|
|
|
|
$anvil->data->{timing}{next_minute_check} = $now_time - 1;
|
|
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time - 1;
|
|
|
|
$anvil->data->{timing}{next_daily_check} = ($now_time + $delay) - 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
|
|
"s2:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
|
|
"s3:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
|
|
"s4:timing::repo_update_interval" => $anvil->data->{timing}{repo_update_interval},
|
|
|
|
"s5:now_time" => $now_time,
|
|
|
|
"s6:delay" => $delay,
|
|
|
|
"s7:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
"s8:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
"s9:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Disconnect. We'll reconnect inside the loop
|
|
|
|
$anvil->Database->disconnect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0203"});
|
|
|
|
|
|
|
|
# This will prevent restarting while jobs are running.
|
|
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
|
|
|
|
# When we periodically check if system files have changed, we'll also ask Database>connect() to check if it
|
|
|
|
# needs to be configured or updated. This is done periodically as it is expensive to run on every loop.
|
|
|
|
my $check_if_database_is_configured = 0;
|
|
|
|
|
|
|
|
# These are the things we always want running.
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
# Reload defaults, re-read the config and then connect to the database(s)
|
|
|
|
$anvil->refresh();
|
|
|
|
|
|
|
|
# If, so some reason, anvil.conf is lost, create it.
|
|
|
|
$anvil->System->_check_anvil_conf();
|
|
|
|
|
|
|
|
$anvil->Database->connect({check_if_configured => $check_if_database_is_configured, check_for_resync => 1});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
|
|
|
|
|
|
|
|
# Mark that we don't want to check the database now.
|
|
|
|
$check_if_database_is_configured = 0;
|
|
|
|
|
|
|
|
# If this host is mapping the network, we'll skip a lot of stuff. If set for over an hour, we'll
|
|
|
|
# clear it.
|
|
|
|
$anvil->data->{sys}{mapping_network} = check_if_mapping($anvil);
|
|
|
|
|
|
|
|
if ($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Run the normal tasks
|
|
|
|
keep_running($anvil);
|
|
|
|
|
|
|
|
# Handle periodic tasks
|
|
|
|
handle_periodic_tasks($anvil) if not $anvil->data->{sys}{mapping_network};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# No databases available, we'll update the state file in case this host is having it's
|
|
|
|
# network mapped and the interface used to talk to the databases went down. That's all we
|
|
|
|
# can do though.
|
|
|
|
update_state_file($anvil);
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, key => "log_0202"});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Exit if 'run-once' selected.
|
|
|
|
if ($anvil->data->{switches}{'run-once'})
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0055"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Disconnect from the database(s) and sleep now.
|
|
|
|
$anvil->Database->disconnect();
|
|
|
|
sleep(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
|
|
|
|
|
|
|
|
#############################################################################################################
|
|
|
|
# Functions #
|
|
|
|
#############################################################################################################
|
|
|
|
|
|
|
|
# Check to see if we're mapping the network on this host.
|
|
|
|
sub check_if_mapping
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
$anvil->data->{sys}{mapping_network} = 0;
|
|
|
|
if ($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
my ($map_network_value, $map_network_uuid, $map_network_mtime, $map_network_modified_date) = $anvil->Database->read_variable({
|
|
|
|
debug => 3,
|
|
|
|
variable_name => "config::map_network",
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_source_uuid => $anvil->data->{sys}{host_uuid},
|
|
|
|
});
|
|
|
|
# We'll run for a day (should be cancelled by the program when the user's done, so this
|
|
|
|
# shouldn't fire in practice).
|
|
|
|
my $expire_age = 86400;
|
|
|
|
my $map_network_age = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
's1:map_network_value' => $map_network_value,
|
|
|
|
's2:map_network_mtime' => $map_network_mtime,
|
|
|
|
's3:map_network_modified_date' => $map_network_modified_date,
|
|
|
|
's4:map_network_uuid' => $map_network_uuid,
|
|
|
|
}});
|
|
|
|
if ($map_network_uuid)
|
|
|
|
{
|
|
|
|
$map_network_age = time - $map_network_mtime;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { map_network_age => $map_network_age }});
|
|
|
|
}
|
|
|
|
if ($map_network_value)
|
|
|
|
{
|
|
|
|
# How long ago was it set?
|
|
|
|
$anvil->data->{switches}{'clear-mapping'} = "" if not defined $anvil->data->{switches}{'clear-mapping'};
|
|
|
|
if (($map_network_age >= $expire_age) or ($anvil->data->{switches}{'clear-mapping'}))
|
|
|
|
{
|
|
|
|
# Clear it.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0470"});
|
|
|
|
$anvil->Database->insert_or_update_variables({
|
|
|
|
debug => 3,
|
|
|
|
variable_value => 0,
|
|
|
|
variable_uuid => $map_network_uuid,
|
|
|
|
update_value_only => 1,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Mark it so we only track the network.
|
|
|
|
my $say_age = $anvil->Convert->add_commas({number => $expire_age});
|
|
|
|
my $timeout = $anvil->Convert->add_commas({number => ($expire_age - $map_network_age)});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0471", variables => {
|
|
|
|
age => $say_age,
|
|
|
|
timeout => $timeout,
|
|
|
|
}});
|
|
|
|
|
|
|
|
$anvil->data->{sys}{mapping_network} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }});
|
|
|
|
|
|
|
|
# Close any open ssh connections.
|
|
|
|
foreach my $ssh_fh_key (keys %{$anvil->data->{cache}{ssh_fh}})
|
|
|
|
{
|
|
|
|
my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
ssh_fh_key => $ssh_fh_key,
|
|
|
|
ssh_fh => $ssh_fh,
|
|
|
|
}});
|
|
|
|
if ($ssh_fh =~ /^Net::OpenSSH/)
|
|
|
|
{
|
|
|
|
$ssh_fh->disconnect();
|
|
|
|
}
|
|
|
|
delete $anvil->data->{cache}{ssh_fh}{$ssh_fh_key};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::mapping_network" => $anvil->data->{sys}{mapping_network} }});
|
|
|
|
return($anvil->data->{sys}{mapping_network});
|
|
|
|
}
|
|
|
|
|
|
|
|
# This decides if the local system will delay daily runs on start-up.
|
|
|
|
sub set_delay
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $delay = 7200;
|
|
|
|
my $type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { type => $type }});
|
|
|
|
if ($type eq "striker")
|
|
|
|
{
|
|
|
|
foreach my $uuid (keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"sys::host_uuid" => $anvil->data->{sys}{host_uuid},
|
|
|
|
uuid => $uuid,
|
|
|
|
}});
|
|
|
|
if ($uuid eq $anvil->data->{sys}{host_uuid})
|
|
|
|
{
|
|
|
|
$delay = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
|
|
|
|
}
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Not a dashboard, don't delay
|
|
|
|
$delay = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { delay => $delay }});
|
|
|
|
}
|
|
|
|
|
|
|
|
return($delay);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This checks to see if it's time to see if the network is ok and, if the system has been up long enough,
|
|
|
|
# checks and tries to repair network issues.
|
|
|
|
sub check_network
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# The network sometimes doesn't come up, but we don't want to try recovering it too soon. As such,
|
|
|
|
# we'll start watching the network after the uptime is 2 minutes.
|
|
|
|
my $uptime = $anvil->Get->uptime;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
|
|
|
|
if ($uptime > 120)
|
|
|
|
{
|
|
|
|
# Check that bonds are up. Degraded bonds will be left alone.
|
|
|
|
if (not $anvil->data->{sys}{network}{initial_checks})
|
|
|
|
{
|
|
|
|
my $running = $anvil->System->check_daemon({daemon => "NetworkManager"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
|
|
|
|
|
|
if (not $running)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0250", variables => { daemon => "NetworkManager" }});
|
|
|
|
my $return_code = $anvil->System->start_daemon({daemon => "NetworkManager"});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
|
|
|
|
}
|
|
|
|
|
|
|
|
#$anvil->Network->check_network({heal => "all"});
|
|
|
|
|
|
|
|
$anvil->data->{sys}{network}{initial_checks} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"sys::network::initial_checks" => $anvil->data->{sys}{network}{initial_checks},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
### NOTE: This is constantly trying to "fix" healthy bonds, without a know way to
|
|
|
|
### trigger to debug. As such, disabling for now.
|
|
|
|
#$anvil->Network->check_network({heal => "down_only"});
|
|
|
|
}
|
|
|
|
|
|
|
|
check_firewall($anvil);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles running tasks that only run on some loops.
|
|
|
|
sub handle_periodic_tasks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $now_time = time;
|
|
|
|
my $type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
"s1:now_time" => $now_time,
|
|
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
"s3:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
"s4:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
"s5:type" => $type,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Time to run once per minute tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_minute_check})
|
|
|
|
{
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
check_network($anvil);
|
|
|
|
|
|
|
|
# Check to see if the PXE environment needs to be updated.
|
|
|
|
check_install_target($anvil);
|
|
|
|
|
|
|
|
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys.
|
|
|
|
$anvil->System->check_ssh_keys({debug => 3});
|
|
|
|
|
|
|
|
$anvil->System->update_hosts({debug => 3});
|
|
|
|
|
|
|
|
# Check if the files on disk have changed. Even if it is time to check, don't if a job is
|
|
|
|
# running.
|
|
|
|
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums))
|
|
|
|
{
|
|
|
|
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Mark that we want to check the database config next time.
|
|
|
|
$check_if_database_is_configured = 1;
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_minute_check} = $now_time + $anvil->data->{timing}{minute_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"s1:timing::minute_checks" => $anvil->data->{timing}{minute_checks},
|
|
|
|
"s2:timing::next_minute_check" => $anvil->data->{timing}{next_minute_check},
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Even when this runs, it should finish in under ten seconds so we don't need to background it.
|
|
|
|
my ($parse_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{'anvil-parse-fence-agents'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { parse_output => $parse_output }});
|
* Moved the fences_unified_metadata file from /tmp, which apache can not read, to /var/www/html/.
* Fixed a bug (well, made a work-around for an issue without a known reproducer) where, on some occassion, a record will end up in the public table without being copied into the history schema. When this happens, the next resync would crash out because the resynd reads in the history table only. Now, when about to INSERT a record into the public schema during a resync, an explicit check is made to see if the record alread
y exists. If it does, the INSERT is instead redirected to the history schema.
* Cleaned up the fence agent metadata when displaying to a user, converting the shell codes to underline a string with square brackets instead. We also now replace newlines with <br /> tags. Lastly, to help fence_azure_arm's metadata description to display cleanly, a check is made to format the table correctly.
* Began work on the Striker menu for handling fence device management
Signed-off-by: Digimer <digimer@alteeve.ca>
5 years ago
|
|
|
|
|
|
|
# Scan the local network.
|
|
|
|
update_state_file($anvil);
|
|
|
|
|
|
|
|
# Make sure the shared directories exist.
|
|
|
|
foreach my $target (sort {$a cmp $b} keys %{$anvil->data->{path}{directories}{shared}})
|
|
|
|
{
|
|
|
|
my $directory = $anvil->data->{path}{directories}{shared}{$target};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
target => $target,
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
if (not -e $anvil->data->{path}{directories}{shared}{$target})
|
|
|
|
{
|
|
|
|
my $failed = $anvil->Storage->make_directory({
|
|
|
|
directory => $directory,
|
|
|
|
group => "apache",
|
|
|
|
user => "apache",
|
|
|
|
mode => "0775",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { failed => $failed }});
|
|
|
|
if ($failed)
|
|
|
|
{
|
|
|
|
# Something went wrong.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "log_0254", variables => {
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Success
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0255", variables => {
|
|
|
|
directory => $directory,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check mail server config.
|
|
|
|
my $problem = $anvil->Email->check_config({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }});
|
|
|
|
}
|
|
|
|
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
# Now check to see if it's time to run less frequent tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_ten_minute_check})
|
|
|
|
{
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_type => $host_type,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Are we a Striker and is there two or more connections? If so, evaluate if we should shut down our
|
|
|
|
# database.
|
|
|
|
if ($host_type eq "striker")
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
{
|
|
|
|
if ($anvil->data->{sys}{database}{connections} > 1)
|
|
|
|
{
|
|
|
|
# Sort by UUID, skip the first, and see if we're one of the others.
|
|
|
|
my $first_uuid = "";
|
|
|
|
foreach my $uuid (sort {$a cmp $b} keys %{$anvil->data->{cache}{database_handle}})
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uuid => $uuid }});
|
|
|
|
if (not $first_uuid)
|
|
|
|
{
|
|
|
|
$first_uuid = $uuid;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { first_uuid => $first_uuid }});
|
|
|
|
}
|
|
|
|
elsif ($uuid eq $host_uuid)
|
|
|
|
{
|
|
|
|
# This is us, backup and shut down.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0659"});
|
|
|
|
|
|
|
|
# Switch the read_uuid and then close
|
|
|
|
$anvil->data->{sys}{database}{read_uuid} = $first_uuid;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "sys::database::read_uuid" => $anvil->data->{sys}{database}{read_uuid} }});
|
|
|
|
|
|
|
|
# Disconnect
|
|
|
|
$anvil->data->{cache}{database_handle}{$uuid}->disconnect;
|
|
|
|
delete $anvil->data->{cache}{database_handle}{$uuid};
|
|
|
|
|
|
|
|
# Create a backup, this is useful also for setting the mtime of the last time
|
|
|
|
# we were up.
|
|
|
|
my $dump_file = $anvil->Database->backup_database({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
|
|
|
|
# Stop the daemon
|
|
|
|
my $return_code = $anvil->System->stop_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { return_code => $return_code }});
|
|
|
|
if ($return_code eq "0")
|
|
|
|
{
|
|
|
|
# Stopped the daemon.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0660"});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
# If we're the active database, dump out database out and rsync it to our peers.
|
|
|
|
my $peers = keys %{$anvil->data->{database}};
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
my $connections = $anvil->data->{sys}{database}{connections};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peers => $peers,
|
|
|
|
connections => $connections,
|
|
|
|
}});
|
|
|
|
if (exists $anvil->data->{cache}{database_handle}{$host_uuid})
|
|
|
|
{
|
|
|
|
# Verify that the database is up.
|
|
|
|
my $running = $anvil->System->check_daemon({daemon => $anvil->data->{sys}{daemon}{postgresql}});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { running => $running }});
|
|
|
|
if ($running)
|
|
|
|
{
|
|
|
|
# Backup our DB.
|
|
|
|
my $dump_file = $anvil->Database->backup_database({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
|
|
|
|
# Now rsync it to our peer(s)
|
|
|
|
foreach my $this_host_uuid (sort {$a cmp $b} keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
next if $this_host_uuid eq $host_uuid;
|
|
|
|
|
|
|
|
my $destination = "root\@".$anvil->data->{database}{$this_host_uuid}{host}.":".$anvil->data->{path}{directories}{pgsql}."/";
|
|
|
|
my $password = $anvil->data->{database}{$this_host_uuid}{password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
this_host_uuid => $this_host_uuid,
|
|
|
|
destination => $destination,
|
|
|
|
password => $anvil->Log->is_secure($password),
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $start_time = time;
|
|
|
|
my $failed = $anvil->Storage->rsync({
|
|
|
|
debug => 3,
|
|
|
|
destination => $destination,
|
|
|
|
password => $password,
|
|
|
|
source => $dump_file,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }});
|
|
|
|
|
|
|
|
my $rsync_time = time - $start_time;
|
|
|
|
my $size = $anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
|
|
my $size_bytes = $anvil->Convert->add_commas({number => $anvil->data->{file_stat}{$dump_file}{size}});
|
|
|
|
my $target_name = $anvil->Get->host_name_from_uuid({debug => 3, host_uuid => $this_host_uuid});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0658", variables => {
|
|
|
|
file => $dump_file,
|
|
|
|
host_name => $target_name,
|
|
|
|
took => $rsync_time,
|
|
|
|
size => $size,
|
|
|
|
size_bytes => $size_bytes,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_ten_minute_check} = $now_time + $anvil->data->{timing}{ten_minute_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"s1:timing::ten_minute_checks" => $anvil->data->{timing}{ten_minute_checks},
|
|
|
|
"s2:timing::next_ten_minute_check" => $anvil->data->{timing}{next_ten_minute_check},
|
|
|
|
}});
|
* WIP: Working on a new method of failing over between which Striker is the active database, instead of running N-number of databases all the time.
* Created Database->backup_database() that creates a pg_dump of the active database.
* Created Database->load_database() that loads the database from a flat file, optionally creating a backup before doing so, and using iptables to block access during the process.
* Updated Database->configure_pgsql() to not start the postgresql daemon unless it just initialized the DB.
* Much work, not yet complete, to Database->connect() to stop after the first successful connection. Added logic that, if not connection was established and the host is a Striker, to load a peer's backup, if it exists, and then start the local daemon.
* Updated anvil-daemon to now have a section to run tasks on a ten minute cycle, which will later be used for the primary Striker to dump / copy its database to peer(s).
Signed-off-by: Madison Kelly <mkelly@alteeve.ca>
3 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Now check to see if it's time to run daily tasks.
|
|
|
|
if ($now_time >= $anvil->data->{timing}{next_daily_check})
|
|
|
|
{
|
|
|
|
### NOTE: We call it once/day, but this will also trigger on restart of anvil-daemon. As such, we
|
|
|
|
### don't use '--force' and let striker-manage-install-target skip the repo update if it happened
|
|
|
|
### recently enough.
|
|
|
|
if ($type eq "striker")
|
|
|
|
{
|
|
|
|
# Age out old data. This takes up to a minute.
|
|
|
|
$anvil->Database->_age_out_data();
|
|
|
|
|
|
|
|
# Archive old data
|
|
|
|
$anvil->Database->archive_database();
|
|
|
|
|
|
|
|
# Record a job, don't call it directly. It takes too long to run.
|
|
|
|
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --refresh".$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "install-target::refresh",
|
|
|
|
job_title => "job_0015",
|
|
|
|
job_description => "job_0017",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
|
|
|
|
# Update the OUI data.
|
|
|
|
($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-parse-oui'}.$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "oui-data::refresh",
|
|
|
|
job_title => "job_0064",
|
|
|
|
job_description => "job_0065",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
|
|
|
|
# Scan the networks
|
|
|
|
($job_uuid) = $anvil->Database->insert_or_update_jobs({
|
|
|
|
file => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
job_command => $anvil->data->{path}{exe}{'striker-scan-network'}.$anvil->Log->switches,
|
|
|
|
job_data => "",
|
|
|
|
job_name => "scan-network::refresh",
|
|
|
|
job_title => "job_0066",
|
|
|
|
job_description => "job_0067",
|
|
|
|
job_progress => 0,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Update the next check time.
|
|
|
|
$anvil->data->{timing}{next_daily_check} = $now_time + $anvil->data->{timing}{daily_checks};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
"s1:timing::daily_checks" => $anvil->data->{timing}{daily_checks},
|
|
|
|
"s2:timing::next_daily_check" => $anvil->data->{timing}{next_daily_check},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config
|
|
|
|
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing
|
|
|
|
# anything.
|
|
|
|
sub check_install_target
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $system_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { system_type => $system_type }});
|
|
|
|
if ($system_type ne "striker")
|
|
|
|
{
|
|
|
|
# Not a dashboard, nothing to do.
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
my $status = "unavailable";
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'striker-manage-install-target'}." --status --check --no-refresh".$anvil->Log->switches});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output }});
|
|
|
|
foreach my $line (split/\n/, $output)
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
|
|
|
|
|
|
if ($line =~ /status=(\d)/)
|
|
|
|
{
|
|
|
|
my $digit = $1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { digit => $digit }});
|
|
|
|
|
|
|
|
if ($digit == 0)
|
|
|
|
{
|
|
|
|
$status = "disabled";
|
|
|
|
}
|
|
|
|
elsif ($digit == 1)
|
|
|
|
{
|
|
|
|
$status = "enabled";
|
|
|
|
}
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { status => $status }});
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Record the status
|
|
|
|
$anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => "install-target::enabled",
|
|
|
|
variable_source_uuid => $anvil->Get->host_uuid,
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_value => $status,
|
|
|
|
variable_default => "unavailable",
|
|
|
|
variable_description => "striker_0110",
|
|
|
|
variable_section => "system",
|
|
|
|
});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the
|
|
|
|
# daemon is restarted or reloaded.
|
|
|
|
sub run_once
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Check that the database is ready.
|
|
|
|
prep_database($anvil);
|
|
|
|
|
|
|
|
# Check to see if we need to do boot-time tasks. We only run these if we've just booted
|
|
|
|
boot_time_tasks($anvil);
|
|
|
|
|
|
|
|
# Check the ssh stuff.
|
|
|
|
# NOTE: This actually runs again in the minutes tasks, but needs to run on boot as well.
|
|
|
|
$anvil->System->check_ssh_keys();
|
|
|
|
|
|
|
|
# Check setuid wrappers
|
|
|
|
check_setuid_wrappers($anvil);
|
|
|
|
|
|
|
|
# Check journald is configured for persistent storage.
|
|
|
|
check_journald($anvil);
|
|
|
|
|
|
|
|
if ($anvil->data->{switches}{'startup-only'})
|
|
|
|
{
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub check_journald
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Check the journald.conf to ensure logging in configured to be persistent.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::configs::journald.conf' => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
|
|
my $peristent_seen = 0;
|
|
|
|
my $change_storage = 0;
|
|
|
|
my $old_journald_conf = $anvil->Storage->read_file({file => $anvil->data->{path}{configs}{'journald.conf'}});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_journald_conf => $old_journald_conf }});
|
|
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { line => $line }});
|
|
|
|
if ($line =~ /^Storage=(.*)$/)
|
|
|
|
{
|
|
|
|
my $value = $1;
|
|
|
|
if ($value eq "persistent")
|
|
|
|
{
|
|
|
|
$peristent_seen = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peristent_seen => $peristent_seen }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$change_storage = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { change_storage => $change_storage }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure the journald directory
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'path::directories::journald' => $anvil->data->{path}{directories}{journald} }});
|
|
|
|
if (not -d $anvil->data->{path}{directories}{journald})
|
|
|
|
{
|
|
|
|
$anvil->Storage->make_directory({
|
|
|
|
debug => 2,
|
|
|
|
directory => $anvil->data->{path}{directories}{journald},
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0248", variables => { directory => $anvil->data->{path}{directories}{journald} }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure the journald is configured for persistent storage.
|
|
|
|
if (not $peristent_seen)
|
|
|
|
{
|
|
|
|
my $storage_added = 0;
|
|
|
|
my $new_journald_conf = "";
|
|
|
|
foreach my $line (split/\n/, $old_journald_conf)
|
|
|
|
{
|
|
|
|
if (($line =~ /^Storage=/) && ($change_storage))
|
|
|
|
{
|
|
|
|
if (not $storage_added)
|
|
|
|
{
|
|
|
|
$storage_added = 1;
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
if (($line =~ /^#Storage=/) && (not $storage_added))
|
|
|
|
{
|
|
|
|
$storage_added = 1;
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
$new_journald_conf .= $line."\n";
|
|
|
|
}
|
|
|
|
if (not $storage_added)
|
|
|
|
{
|
|
|
|
$new_journald_conf .= "Storage=persistent\n";
|
|
|
|
}
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { new_journald_conf => $new_journald_conf }});
|
|
|
|
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
debug => 3,
|
|
|
|
secure => 0,
|
|
|
|
file => $anvil->data->{path}{configs}{'journald.conf'},
|
|
|
|
body => $new_journald_conf,
|
|
|
|
mode => "0644",
|
|
|
|
overwrite => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "message_0013", variables => { file => $anvil->data->{path}{configs}{'journald.conf'} }});
|
|
|
|
|
|
|
|
# Restart the journald service.
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{systemctl}." restart systemd-journald.service";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This creates, as needed, the setuid wrappers used by apache to make certain system calls.
|
|
|
|
sub check_setuid_wrappers
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_type => $host_type }});
|
|
|
|
if ($host_type ne "striker")
|
|
|
|
{
|
|
|
|
# Not a dashboard, setuid scripts aren't needed.
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Does the call_striker-get-peer-data wrapper exist yet?
|
|
|
|
if (-e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
|
|
{
|
|
|
|
# Exists, skipping.
|
* Got the node/dr host initialization form to the point where it can test access and decide if it should show the Red Hat account form. Decided that for M3, node/dr host setup will now be a four-stage process; initial install (over PXE), initialization (install the proper anvil-{node,dr} RPM and connect to the database), setup/map the network, and then add to an Anvil! pair.
* Updated striker to no longer try to SSH to a remote machine. To enable this, we'd have to give apache a shell and an SSH key, which is dumb and dangerous when considered.
* Created tools/striker-get-peer-data which is meant to be invoked as the 'admin' user (via a setuid c-wrapper). It collects basic data about a target machine and reports what it finds on STDOUT. It gets the password for the target via the database.
* Updated anvil-daemon to check/create/update setuid c-wrapper(s), which for now is limited to call_striker-initialize-host.
* Created Anvil/Tools/Striker.pm to store Striker web-specific methods, including get_peer_data() which calls tools/striker-initialize-host via the setuid admin call_striker-initialize-host c-wrapper.
* In order to allow striker via apache to read a peer's anvil.version, which it can no longer do over SSH, any connection to a peer where the anvil.version is read is cached as /etc/anvil/anvil.<peer>.version. When Get->anvil_version is called as 'apache', this file is read instead.
* Updated Database->resync_databases() and ->_find_behind_databases() to ignore the 'states' table.
* Created tools/striker-initialize-host which will be called as a job to initialize a node/dr host.
Signed-off-by: Digimer <digimer@alteeve.ca>
5 years ago
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0436", variables => { wrapper => $anvil->data->{path}{exe}{'call_striker-get-peer-data'} }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# What is the admin user and group ID?
|
|
|
|
my $admin_uid = getpwnam('admin');
|
|
|
|
my $admin_gid = getgrnam('admin');
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
admin_uid => $admin_uid,
|
|
|
|
admin_gid => $admin_gid,
|
|
|
|
}});
|
|
|
|
next if not $admin_uid;
|
|
|
|
next if not $admin_gid;
|
|
|
|
|
|
|
|
# Write the body out
|
|
|
|
my $call_striker_get_peer_data_body = "#define REAL_PATH \"".$anvil->data->{path}{exe}{'striker-get-peer-data'}."\"\n";
|
|
|
|
$call_striker_get_peer_data_body .= "main(ac, av)\n";
|
|
|
|
$call_striker_get_peer_data_body .= "char **av;\n";
|
|
|
|
$call_striker_get_peer_data_body .= "{\n";
|
|
|
|
$call_striker_get_peer_data_body .= " setuid(".$admin_uid.");\n";
|
|
|
|
$call_striker_get_peer_data_body .= " setgid(".$admin_gid.");\n";
|
|
|
|
$call_striker_get_peer_data_body .= " execv(REAL_PATH, av);\n";
|
|
|
|
$call_striker_get_peer_data_body .= "}\n";
|
|
|
|
my $error = $anvil->Storage->write_file({
|
|
|
|
debug => 3,
|
|
|
|
file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
|
|
body => $call_striker_get_peer_data_body,
|
|
|
|
mode => '644',
|
|
|
|
overwrite => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { error => $error }});
|
|
|
|
|
|
|
|
# If it wrote out, compile it.
|
|
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c")
|
|
|
|
{
|
|
|
|
# Failed to write.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0071", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Compile it
|
|
|
|
my ($output, $return_code) = $anvil->System->call({
|
|
|
|
debug => 3,
|
|
|
|
shell_call => $anvil->data->{path}{exe}{gcc}." -o ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}." ".$anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If it compiled, setuid it.
|
|
|
|
if (not -e $anvil->data->{path}{exe}{'call_striker-get-peer-data'})
|
|
|
|
{
|
|
|
|
# Something went wrong compiling it.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "error_0072", variables => { file => $anvil->data->{path}{exe}{'call_striker-get-peer-data'}.".c" }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$anvil->Storage->change_owner({
|
|
|
|
debug => 3,
|
|
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
|
|
user => 'root',
|
|
|
|
group => 'root',
|
|
|
|
});
|
|
|
|
$anvil->Storage->change_mode({
|
|
|
|
debug => 3,
|
|
|
|
path => $anvil->data->{path}{exe}{'call_striker-get-peer-data'},
|
|
|
|
mode => '4755',
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Configure/update the firewall.
|
|
|
|
sub check_firewall
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Don't call this if we're not configured yet.
|
|
|
|
my $configured = $anvil->System->check_if_configured({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
|
|
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
if ($configured)
|
|
|
|
{
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{'anvil-manage-firewall'}.$anvil->Log->switches});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles tasks that need to run on boot (if any)
|
|
|
|
sub boot_time_tasks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# If the uptime is less than ten minutes, clear the reboot flag.
|
|
|
|
my $uptime = $anvil->Get->uptime;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }});
|
|
|
|
|
|
|
|
# Now find out if a reboot is listed as needed and when it was last changed.
|
|
|
|
my $reboot_needed = 0;
|
|
|
|
my $changed_seconds_ago = 0;
|
|
|
|
my $query = "
|
|
|
|
SELECT
|
|
|
|
variable_value,
|
|
|
|
(SELECT extract(epoch from now()) - extract(epoch from modified_date)) AS changed_seconds_ago
|
|
|
|
FROM
|
|
|
|
variables
|
|
|
|
WHERE
|
|
|
|
variable_source_table = 'hosts'
|
|
|
|
AND
|
|
|
|
variable_source_uuid = ".$anvil->Database->quote($anvil->Get->host_uuid)."
|
|
|
|
AND
|
|
|
|
variable_name = 'reboot::needed'
|
|
|
|
;";
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0124", variables => { query => $query }});
|
|
|
|
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
|
|
|
|
my $count = @{$results};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
results => $results,
|
|
|
|
count => $count,
|
|
|
|
}});
|
|
|
|
if ($count)
|
|
|
|
{
|
|
|
|
$reboot_needed = $results->[0]->[0];
|
|
|
|
$changed_seconds_ago = $results->[0]->[1];
|
|
|
|
$changed_seconds_ago =~ s/^(\d+)\..*$/$1/;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
reboot_needed => $reboot_needed,
|
|
|
|
changed_seconds_ago => $changed_seconds_ago,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
# If a reboot is needed, see if the uptime is less than the time since the reboot needed flag was
|
|
|
|
# set. If the uptime is less, then the system rebooted since it was requested so clear it. h/t to
|
|
|
|
# Lisa Seelye (@thedoh) for this idea!
|
|
|
|
my $difference = ($changed_seconds_ago - $uptime);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"s1:reboot_needed" => $reboot_needed,
|
|
|
|
"s2:changed_seconds_ago" => $changed_seconds_ago,
|
|
|
|
"s3:uptime" => $uptime,
|
|
|
|
"s4:difference" => $difference,
|
|
|
|
}});
|
|
|
|
if ($reboot_needed)
|
|
|
|
{
|
|
|
|
if ($uptime < $changed_seconds_ago)
|
|
|
|
{
|
|
|
|
# Clear the reboot request.
|
|
|
|
$reboot_needed = $anvil->System->reboot_needed({debug => 2, set => 0});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { reboot_needed => $reboot_needed }});
|
|
|
|
|
|
|
|
# Check to see if there was a reboot job in progress. If so, finish it off.
|
|
|
|
my $job_uuid = $anvil->Job->get_job_uuid({
|
|
|
|
debug => 2,
|
|
|
|
program => "anvil-manage-power",
|
|
|
|
incomplete => 1,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
|
|
|
|
|
|
|
|
if ($job_uuid)
|
|
|
|
{
|
|
|
|
# Update the percentage to '100' and then clear the old PID.
|
|
|
|
my $date_time = $anvil->Get->date_and_time();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { date_time => $date_time }});
|
|
|
|
|
|
|
|
$anvil->Job->update_progress({
|
|
|
|
progress => 100,
|
|
|
|
message => "message_0064,!!date_and_time!".$date_time."!!",
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
picked_up_by => 0,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Update our status
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0572"});
|
|
|
|
|
|
|
|
$anvil->Database->get_hosts({debug => 2});
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { host_uuid => $host_uuid }});
|
|
|
|
$anvil->Database->insert_or_update_hosts({
|
|
|
|
debug => 2,
|
|
|
|
host_ipmi => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_ipmi},
|
|
|
|
host_key => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_key},
|
|
|
|
host_name => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name},
|
|
|
|
host_type => $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type},
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
host_status => "online",
|
|
|
|
});
|
|
|
|
|
|
|
|
# Make sure our stop reason is cleared.
|
|
|
|
my $variable_uuid = $anvil->Database->insert_or_update_variables({
|
|
|
|
variable_name => 'system::stop_reason',
|
|
|
|
variable_value => '',
|
|
|
|
variable_default => '',
|
|
|
|
variable_description => 'striker_0279',
|
|
|
|
variable_section => 'system',
|
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
variable_source_table => 'hosts',
|
|
|
|
});
|
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
4 years ago
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure /etc/hosts is updated.
|
|
|
|
$anvil->System->update_hosts();
|
|
|
|
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
|
|
handle_special_cases($anvil);
|
|
|
|
|
|
|
|
# Now look for jobs that have a job status of 'anvil_startup'
|
|
|
|
run_jobs($anvil, 1);
|
|
|
|
|
|
|
|
# Check the firewall needs to be updated.
|
|
|
|
check_firewall($anvil);
|
|
|
|
|
|
|
|
# If we're a striker, check apache
|
|
|
|
my $host_type = $anvil->Get->host_type;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
$anvil->Striker->check_httpd_conf({debug => 2});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This handles weird bits for things like bug work-arounds.
|
|
|
|
sub handle_special_cases
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# RHBZ #1961562 - https://bugzilla.redhat.com/show_bug.cgi?id=1961562#c16
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type ne "striker")
|
|
|
|
{
|
|
|
|
# We're a node or DR host. We need to touch this file.
|
|
|
|
my $work_around_file = "/etc/qemu/firmware/50-edk2-ovmf-cc.json";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { work_around_file => $work_around_file }});
|
|
|
|
if (not -e $work_around_file)
|
|
|
|
{
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
debug => 2,
|
|
|
|
file => $work_around_file,
|
|
|
|
body => "",
|
|
|
|
overwrite => 0,
|
|
|
|
backup => 0,
|
|
|
|
mode => "0644",
|
|
|
|
user => "root",
|
|
|
|
group => "root",
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Configure the local database, if needed.
|
|
|
|
sub prep_database
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# If there's a backup file, we're configured and possibly just off.
|
|
|
|
foreach my $uuid (keys %{$anvil->data->{database}})
|
|
|
|
{
|
|
|
|
my $dump_file = $anvil->data->{path}{directories}{pgsql}."/".$anvil->data->{sys}{database}{name}."_db_dump.".$uuid.".sql";
|
|
|
|
$dump_file =~ s/\/\//\//g;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dump_file => $dump_file }});
|
|
|
|
if (-e $dump_file)
|
|
|
|
{
|
|
|
|
# No need to prepare.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0665", variables => { file => $dump_file }});
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Only run this if we're a dashboard.
|
|
|
|
my $host_type = $anvil->Get->host_type();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
|
|
|
|
if ($host_type eq "striker")
|
|
|
|
{
|
|
|
|
### NOTE: This failed once, in case / until it happens again, we'll force log level 2 and secure logging.
|
|
|
|
#my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}.$anvil->Log->switches, source => $THIS_FILE, line => __LINE__ });
|
|
|
|
my ($database_output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{'striker-prep-database'}." -vv --log-secure", source => $THIS_FILE, line => __LINE__ });
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
database_output => $database_output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# These are tools that need to keep running.
|
|
|
|
sub keep_running
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
# Check for jobs that were running and now exited.
|
|
|
|
if ((not $anvil->data->{sys}{mapping_network}) && (exists $anvil->data->{processes}))
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
{
|
|
|
|
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}})
|
|
|
|
{
|
|
|
|
# If it's not a handle, delete it.
|
|
|
|
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
|
|
running => $running,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If it's not running, update the table to clear the 'job_picked_up_by' column.
|
|
|
|
if (not $running)
|
|
|
|
{
|
|
|
|
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
exit_status => $exit_status,
|
|
|
|
}});
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
|
|
|
|
# Free up memory
|
|
|
|
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup();
|
|
|
|
|
|
|
|
$anvil->Job->clear({job_uuid => $job_uuid});
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# If we're configured, write out the status JSON file. If we're not configured, Update hardware state files.
|
|
|
|
my $configured = $anvil->System->check_if_configured;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { configured => $configured }});
|
|
|
|
if ((not $anvil->data->{sys}{mapping_network}) && ($configured))
|
|
|
|
{
|
|
|
|
# Write out state information for all known Anvil! systems and the information from
|
|
|
|
# unconfigured nods and DR hosts, using just database data (hence, fast enough to run
|
|
|
|
# constantly).
|
|
|
|
$anvil->System->generate_state_json({debug => 3});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Run this to monitor the network in real time.
|
|
|
|
update_state_file($anvil);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process.
|
|
|
|
run_jobs($anvil, 0) if not $anvil->data->{sys}{mapping_network};
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made
|
|
|
|
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is
|
|
|
|
# invoked to handle it.
|
|
|
|
sub run_jobs
|
|
|
|
{
|
|
|
|
my ($anvil, $startup) = @_;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { startup => $startup }});
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
|
|
|
|
# This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's
|
|
|
|
# changed on disk.
|
|
|
|
$anvil->data->{sys}{jobs_running} = 0;
|
|
|
|
|
|
|
|
# We'll also update the jobs.json file.
|
|
|
|
my $jobs_file = "{\"jobs\":[\n";
|
|
|
|
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
# Get a list of pending or incomplete jobs.
|
|
|
|
my $ended_within = $startup ? 1 : 300;
|
|
|
|
my $return = $anvil->Database->get_jobs({ended_within => $ended_within});
|
|
|
|
my $count = @{$return};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
ended_within => $ended_within,
|
|
|
|
'return' => $return,
|
|
|
|
count => $count,
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
}});
|
|
|
|
foreach my $hash_ref (@{$return})
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
{
|
|
|
|
my $job_uuid = $hash_ref->{job_uuid};
|
|
|
|
my $job_command = $hash_ref->{job_command};
|
|
|
|
my $job_data = $hash_ref->{job_data};
|
|
|
|
my $job_picked_up_by = $hash_ref->{job_picked_up_by};
|
|
|
|
my $job_picked_up_at = $hash_ref->{job_picked_up_at};
|
|
|
|
my $job_updated = $hash_ref->{job_updated};
|
|
|
|
my $job_name = $hash_ref->{job_name};
|
|
|
|
my $job_progress = $hash_ref->{job_progress};
|
|
|
|
my $job_title = $hash_ref->{job_title};
|
|
|
|
my $job_description = $hash_ref->{job_description};
|
|
|
|
my $job_status = $hash_ref->{job_status};
|
|
|
|
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0;
|
|
|
|
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
job_command => $job_command,
|
|
|
|
job_data => $job_data,
|
|
|
|
job_picked_up_by => $job_picked_up_by,
|
|
|
|
job_picked_up_at => $job_picked_up_at,
|
|
|
|
job_updated => $job_updated,
|
|
|
|
job_name => $job_name,
|
|
|
|
job_progress => $job_progress,
|
|
|
|
job_title => $job_title,
|
|
|
|
job_description => $job_description,
|
|
|
|
job_status => $job_status,
|
|
|
|
started_seconds_ago => $started_seconds_ago,
|
|
|
|
updated_seconds_ago => $updated_seconds_ago,
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
}});
|
|
|
|
|
|
|
|
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
|
|
|
|
if (($startup) && ($job_status ne "anvil_startup"))
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
job_command => $job_command,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($job_progress ne "100")
|
|
|
|
{
|
|
|
|
$anvil->data->{sys}{jobs_running} = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# See if the job was picked up by a now-dead instance.
|
|
|
|
if ($job_picked_up_by)
|
|
|
|
{
|
|
|
|
# Check if the PID is still active.
|
|
|
|
$anvil->System->pids({ignore_me => 1});
|
|
|
|
|
|
|
|
### TODO: Add a check to verify the job isn't hung.
|
|
|
|
# Skip if this job is in progress.
|
|
|
|
if (not exists $anvil->data->{pids}{$job_picked_up_by})
|
|
|
|
{
|
|
|
|
# If the job is done, just clear the 'job_picked_up_by' and be done.
|
|
|
|
if ($job_progress ne "100")
|
|
|
|
{
|
|
|
|
# It's possible that the job updated to 100% and exited after we
|
|
|
|
# gathered the job data, so we won't restart until we've seen it not
|
|
|
|
# running and not at 100% after 5 loops.
|
|
|
|
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid}))
|
|
|
|
{
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
if ($anvil->data->{lost_job_count}{$job_uuid} > 5)
|
|
|
|
{
|
|
|
|
# The previous job is gone, but the job isn't finished. Start it again.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0007", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
pid => $job_picked_up_by,
|
|
|
|
percent => $job_progress,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Clear some variables.
|
|
|
|
$job_progress = 0;
|
|
|
|
$job_status = "message_0056";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
job_progress => $job_progress,
|
|
|
|
job_status => $job_status,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Clear the job.
|
|
|
|
$anvil->Job->clear({debug => 2, job_uuid => $job_uuid});
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid} = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$anvil->data->{lost_job_count}{$job_uuid}++;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Clear the PID
|
|
|
|
$job_picked_up_by = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Convert the double-banged strings into a proper message.
|
|
|
|
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : "";
|
|
|
|
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : "";
|
|
|
|
my $say_status = $job_status ? $anvil->Words->parse_banged_string({key_string => $job_status}) : "";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
job_title => $job_title,
|
|
|
|
say_description => $say_description,
|
|
|
|
say_status => $say_status,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Make the status HTML friendly. Strip any embedded HTML then encode the text string.
|
|
|
|
if ($say_status)
|
|
|
|
{
|
|
|
|
my $html_strip = HTML::Strip->new();
|
|
|
|
$say_status = $html_strip->parse($say_status);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
|
|
|
|
|
|
# Now make the resulting text string HTML friendly
|
|
|
|
my $text_to_html = HTML::FromText->new({
|
|
|
|
urls => 1,
|
|
|
|
email => 1,
|
|
|
|
lines => 1,
|
|
|
|
});
|
|
|
|
$say_status = $text_to_html->parse($say_status);
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Add this to the jobs.json file
|
|
|
|
my $json_string = to_json ({
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
job_command => $job_command,
|
|
|
|
job_data => $job_data,
|
|
|
|
job_picked_up_at => $job_picked_up_at,
|
|
|
|
job_updated => $job_updated,
|
|
|
|
job_name => $job_name,
|
|
|
|
job_progress => $job_progress,
|
|
|
|
job_title => $say_title,
|
|
|
|
job_description => $say_description,
|
|
|
|
job_status => $say_status,
|
|
|
|
started_seconds_ago => $started_seconds_ago,
|
|
|
|
updated_seconds_ago => $updated_seconds_ago,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { json_string => $json_string }});
|
|
|
|
$jobs_file .= $json_string.",\n";
|
|
|
|
|
|
|
|
# If the job is done, move on.
|
|
|
|
next if $job_progress eq "100";
|
|
|
|
next if $anvil->data->{switches}{'no-start'};
|
|
|
|
|
|
|
|
# If 'startup' is set, we only care if 'job_status' is 'anvil_startup'
|
|
|
|
if ((not $startup) && ($say_status eq "anvil_startup"))
|
|
|
|
{
|
|
|
|
# Skip this, it will run next time anvil-daemon restarts.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => {
|
|
|
|
command => $job_command,
|
|
|
|
job_uuid => $job_uuid,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
# If the job is not running, start it.
|
|
|
|
if (not $job_picked_up_by)
|
|
|
|
{
|
|
|
|
my $command = $job_command." --job-uuid ".$job_uuid;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }});
|
|
|
|
|
|
|
|
# Have we started this job recently?
|
|
|
|
if (exists $anvil->data->{jobs}{$job_uuid}{started})
|
|
|
|
{
|
|
|
|
my $last_start = time - $anvil->data->{jobs}{$job_uuid}{started};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { last_start => $last_start }});
|
|
|
|
|
|
|
|
if ($last_start < 60)
|
|
|
|
{
|
|
|
|
# Skip, Started too recently.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0578", variables => {
|
|
|
|
command => $command,
|
|
|
|
last_start => $last_start,
|
|
|
|
}});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Start the job, appending '--job-uuid' to the command.
|
|
|
|
($anvil->data->{jobs}{handles}{$job_uuid}, my $return_code) = $anvil->System->call({
|
|
|
|
background => 1,
|
|
|
|
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout",
|
|
|
|
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr",
|
|
|
|
shell_call => $command,
|
|
|
|
source => $THIS_FILE,
|
|
|
|
line => __LINE__,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Log the PID (the job should update the database).
|
|
|
|
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
|
|
|
|
|
|
|
|
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute.
|
|
|
|
$anvil->data->{jobs}{$job_uuid}{started} = time;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }});
|
|
|
|
}
|
* Fixed a bug where setting the debug level to 3 caused a deep recursion and a system hang.
* Update Anvil::Tools->new() to access the parameters 'log_level', 'log_secure' and 'debug', streamlining the frequent calls to $anvil->Log->level and ->secure in program startup, and allowing the values to take effect during the ->new constructor.
* Passed 'debug' to child method calls in more places (still more to do though).
* Fixed a bug where 'test_table' wasn't set in the right place, causing the database to try to initialize repeatedly.
* Made Database->archive_database only run if called with root access.
* Now the number of database connections are stored in 'sys::db_connections' instead of checking the returned number, and that is cleared on disconnect.
* Started working more on 'anvil-daemon', including adding support for System->call being taking 'background', 'stderr_file' and 'stdout_file' paramters which, when set, used Proc::Simple to background the process.
* Did some more work on database archiving, though still far from done.
Signed-off-by: Digimer <digimer@alteeve.ca>
6 years ago
|
|
|
}
|
|
|
|
|
|
|
|
# Close the jobs file.
|
|
|
|
$jobs_file =~ s/,\n$/\n/ms;
|
|
|
|
$jobs_file .= "]}\n";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { jobs_file => $jobs_file }});
|
|
|
|
|
|
|
|
# Write the JSON file
|
|
|
|
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }});
|
|
|
|
$anvil->Storage->write_file({
|
|
|
|
file => $output_json,
|
|
|
|
body => $jobs_file,
|
|
|
|
overwrite => 1,
|
|
|
|
backup => 0,
|
|
|
|
mode => "0644",
|
|
|
|
user => "apache",
|
|
|
|
group => "apache",
|
|
|
|
});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This calls 'anvil-update-states' which will scan the local machine's state (hardware and software) and
|
|
|
|
# record write it out to an HTML file
|
|
|
|
sub update_state_file
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0480"});
|
|
|
|
|
|
|
|
#my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}.$anvil->Log->switches;
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }});
|
|
|
|
|
|
|
|
my ($states_output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
|
|
|
|
states_output => $states_output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|