You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
416 lines
15 KiB
416 lines
15 KiB
#!/usr/bin/perl |
|
# |
|
# This is the master daemon that manages all periodically run processes on Striker dashboards and Anvil! |
|
# nodes. |
|
# |
|
# Exit codes; |
|
# 0 = Normal exit |
|
# 1 = md5sum of this program changed. Exited to reload. |
|
# 2 = Unable to connect to any database, even after trying to initialize the local system. |
|
# |
|
# TODO: |
|
# - Need to check what kind of machine this is and not prep the database unless its a dashboard. |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
use Proc::Simple; |
|
use JSON; |
|
use HTML::Strip; |
|
use HTML::FromText; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
my $anvil = Anvil::Tools->new({log_level => 2, log_secure => 1}); |
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
|
# is to setup the database server. |
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); |
|
|
|
# If I have no databases, sleep for a second and then exit (systemd will restart us). |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Try to configure the local database, and then try to connect again. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0201"}); |
|
prep_database($anvil); |
|
sleep 1; |
|
|
|
# Try connecting again |
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Still nothing, sleep and exit. |
|
print $anvil->Words->string({key => "error_0003"})."\n"; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0003"}); |
|
$anvil->nice_exit({exit_code => 2}); |
|
} |
|
} |
|
|
|
# Read switches |
|
$anvil->data->{switches}{'run-once'} = ""; |
|
$anvil->data->{switches}{'main-loop-only'} = ""; |
|
$anvil->Get->switches; |
|
|
|
# There are some things we only want to run on (re)start and don't need to always run. |
|
run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'}; |
|
|
|
# Calculate my sum so that we can exit if it changes later. |
|
$anvil->Storage->record_md5sums; |
|
|
|
# Disconnect. We'll reconnect inside the loop |
|
$anvil->Database->disconnect; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0203"}); |
|
|
|
# These are the things we always want running. |
|
while(1) |
|
{ |
|
# Connect to the database(s) |
|
$anvil->Storage->read_config({file => "/etc/anvil/anvil.conf"}); |
|
$anvil->Database->connect; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); |
|
|
|
if ($anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Loop and sleep for 2s. |
|
keep_running($anvil); |
|
} |
|
else |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "log_0202"}); |
|
} |
|
|
|
# Exit if called with '--run-once' |
|
if ($anvil->data->{switches}{'run-once'}) |
|
{ |
|
$anvil->nice_exit({code => 0}); |
|
} |
|
|
|
# Has the file on disk changed? |
|
if ($anvil->Storage->check_md5sums) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "warn", key => "message_0014"}); |
|
$anvil->nice_exit({code => 1}); |
|
} |
|
|
|
# Exit if 'run-once' selected. |
|
if ($anvil->data->{switches}{'run-once'}) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "warn", key => "message_0055"}); |
|
$anvil->nice_exit({code => 0}); |
|
} |
|
|
|
# Sleep now. |
|
sleep 2; |
|
} |
|
|
|
$anvil->nice_exit({code => 0}); |
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
# These are tools that don't need to constantly run. They'll typically run when the server starts up or the |
|
# daemon is restarted or reloaded. |
|
sub run_once |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Check that the database is ready. |
|
prep_database($anvil); |
|
|
|
# If the uptime is less than ten minutes, clear the reboot flag. |
|
my $uptime = $anvil->Storage->read_file({ |
|
debug => 2, |
|
force_read => 1, |
|
cache => 0, |
|
file => $anvil->data->{path}{proc}{uptime}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); |
|
|
|
# Clean it up. We'll have gotten two numbers, the uptime in seconds (to two decimal places) and the |
|
# total idle time. We only care about the int number. |
|
$uptime =~ s/^(\d+)\..*$/$1/; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); |
|
|
|
if ($uptime < 600) |
|
{ |
|
# Clear the reboot request. |
|
my $output = $anvil->System->call({ |
|
debug => 2, |
|
shell_call => $anvil->data->{path}{exe}{'anvil-clear-reboot'}, |
|
source => $THIS_FILE, |
|
line => __LINE__, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# Configure the local database, if needed. |
|
sub prep_database |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-prep-database'}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); |
|
my $database_output = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
if ($database_output) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { database_output => $database_output }}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# These are tools that need to keep running. |
|
sub keep_running |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Check for jobs that were running and now exited. |
|
if (exists $anvil->data->{processes}) |
|
{ |
|
foreach my $job_uuid (%{$anvil->data->{jobs}{handles}}) |
|
{ |
|
# If it's not a handle, delete it. |
|
my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { |
|
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, |
|
running => $running, |
|
}}); |
|
|
|
# If it's not running, update the table to clear the 'job_picked_up_by' column. |
|
if (not $running) |
|
{ |
|
my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { exit_status => $exit_status }}); |
|
|
|
# Free up memory |
|
$anvil->data->{jobs}{handles}{$job_uuid}->cleanup(); |
|
|
|
clear_job($anvil, $job_uuid); |
|
} |
|
} |
|
} |
|
|
|
# Update hardware state files. |
|
update_state_file($anvil); |
|
|
|
# Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process. |
|
run_jobs($anvil); |
|
|
|
return(0); |
|
} |
|
|
|
# This clears the 'job_picked_up_by'. |
|
sub clear_job |
|
{ |
|
my ($anvil, $job_uuid) = @_; |
|
|
|
my $query = " |
|
UPDATE |
|
jobs |
|
SET |
|
job_picked_up_by = '0', |
|
modified_date = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})." |
|
WHERE |
|
job_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($job_uuid)." |
|
"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); |
|
$anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); |
|
|
|
return(0); |
|
} |
|
|
|
# This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made |
|
# to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is |
|
# invoked to handle it. |
|
sub run_jobs |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# We'll also update the jobs.json file. |
|
my $jobs_file = "{\"jobs\":[\n"; |
|
|
|
# Get a list of pending or incomplete jobs. |
|
my $return = $anvil->Database->get_jobs({debug => 2, ended_within => 300}); |
|
my $count = @{$return}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'return' => $return, |
|
count => $count, |
|
}}); |
|
foreach my $hash_ref (@{$return}) |
|
{ |
|
my $job_uuid = $hash_ref->{job_uuid}; |
|
my $job_command = $hash_ref->{job_command}; |
|
my $job_data = $hash_ref->{job_data}; |
|
my $job_picked_up_by = $hash_ref->{job_picked_up_by}; |
|
my $job_picked_up_at = $hash_ref->{job_picked_up_at}; |
|
my $job_updated = $hash_ref->{job_updated}; |
|
my $job_name = $hash_ref->{job_name}; |
|
my $job_progress = $hash_ref->{job_progress}; |
|
my $job_title = $hash_ref->{job_title}; |
|
my $job_description = $hash_ref->{job_description}; |
|
my $job_status = $hash_ref->{job_status}; |
|
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0; |
|
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_uuid => $job_uuid, |
|
job_command => $job_command, |
|
job_data => $job_data, |
|
job_picked_up_by => $job_picked_up_by, |
|
job_picked_up_at => $job_picked_up_at, |
|
job_updated => $job_updated, |
|
job_name => $job_name, |
|
job_progress => $job_progress, |
|
job_title => $job_title, |
|
job_description => $job_description, |
|
job_status => $job_status, |
|
started_seconds_ago => $started_seconds_ago, |
|
updated_seconds_ago => $updated_seconds_ago, |
|
}}); |
|
|
|
# See if the job was picked up by a now-dead instance. |
|
if ($job_picked_up_by) |
|
{ |
|
# Check if the PID is still active. |
|
$anvil->System->pids({ignore_me => 1}); |
|
|
|
### TODO: Add a check to verify the job isn't hung. |
|
# Skip if this job is in progress. |
|
if (not exists $anvil->data->{pids}{$job_picked_up_by}) |
|
{ |
|
# The previous job is gone, but the job isn't finished. Start it again. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "striker_warning_0007", variables => { |
|
command => $job_command, |
|
pid => $job_picked_up_by, |
|
percent => $job_progress, |
|
}}); |
|
clear_job($anvil, $job_uuid); |
|
|
|
# Clear some variables. |
|
$job_picked_up_by = 0; |
|
$job_progress = 0; |
|
$job_status = "message_0056"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
job_picked_up_by => $job_picked_up_by, |
|
job_progress => $job_progress, |
|
job_status => $job_status, |
|
}}); |
|
} |
|
} |
|
|
|
# Convert the double-banged strings into a proper message. |
|
my $say_title = $job_title ? $anvil->Words->parse_banged_string({key_string => $job_title}) : ""; |
|
my $say_description = $job_description ? $anvil->Words->parse_banged_string({key_string => $job_description}) : ""; |
|
my $say_status = $job_progress ? $anvil->Words->parse_banged_string({key_string => $job_status}) : ""; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { |
|
job_title => $job_title, |
|
say_description => $say_description, |
|
say_status => $say_status, |
|
}}); |
|
|
|
# Make the status HTML friendly. Strip any embedded HTML then encode the text string. |
|
my $html_strip = HTML::Strip->new(); |
|
$say_status = $html_strip->parse($say_status); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); |
|
|
|
# Now make the resulting text string HTML friendly |
|
my $text_to_html = HTML::FromText->new({ |
|
urls => 1, |
|
email => 1, |
|
lines => 1, |
|
}); |
|
$say_status = $text_to_html->parse($say_status); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { say_status => $say_status }}); |
|
|
|
# Add this to the jobs.json file |
|
my $json_string = to_json ({ |
|
job_uuid => $job_uuid, |
|
job_command => $job_command, |
|
job_data => $job_data, |
|
job_picked_up_at => $job_picked_up_at, |
|
job_updated => $job_updated, |
|
job_name => $job_name, |
|
job_progress => $job_progress, |
|
job_title => $say_title, |
|
job_description => $say_description, |
|
job_status => $say_status, |
|
started_seconds_ago => $started_seconds_ago, |
|
updated_seconds_ago => $updated_seconds_ago, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { json_string => $json_string }}); |
|
$jobs_file .= $json_string."\n"; |
|
|
|
# If the job is done, move on. |
|
next if $job_progress eq "100"; |
|
|
|
# If the job is not running, start it. |
|
if (not $job_picked_up_by) |
|
{ |
|
# Start the job, appending '--job-uuid' to the command. |
|
$anvil->data->{jobs}{handles}{$job_uuid} = $anvil->System->call({ |
|
debug => 2, |
|
background => 1, |
|
stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout", |
|
stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr", |
|
shell_call => $job_command." --job-uuid ".$job_uuid, |
|
source => $THIS_FILE, |
|
line => __LINE__, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid} }}); |
|
|
|
# Log the PID (the job should update the database). |
|
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); |
|
} |
|
} |
|
|
|
# Close the jobs file. |
|
$jobs_file .= "]}\n"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { jobs_file => $jobs_file }}); |
|
|
|
# Write the JSON file |
|
my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }}); |
|
$anvil->Storage->write_file({ |
|
file => $output_json, |
|
body => $jobs_file, |
|
overwrite => 1, |
|
mode => "0644", |
|
user => "apache", |
|
group => "apache" |
|
}); |
|
|
|
return(0); |
|
} |
|
|
|
# This calls 'anvil-update-states' which will scan the local machine's state (hardware and software) and |
|
# record write it out to an HTML file |
|
sub update_state_file |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); |
|
|
|
my $states_output = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); |
|
if ($states_output) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { states_output => $states_output }}); |
|
} |
|
|
|
return(0); |
|
}
|
|
|