#!/usr/bin/perl # # This is the master daemon that manages all periodically run processes on Striker dashboards and Anvil! # nodes. # # Exit codes; # 0 = Normal exit # 1 = md5sum of this program changed. Exited to reload. # 2 = Unable to connect to any database, even after trying to initialize the local system. # # TODO: # - Need to check what kind of machine this is and not prep the database unless its a dashboard. # - Add a "running: pending,yes,done,dead" and show an appropriate icon beside jobs # use strict; use warnings; use Anvil::Tools; use Proc::Simple; use JSON; use HTML::Strip; use HTML::FromText; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } # Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. $| = 1; my $anvil = Anvil::Tools->new({log_level => 2, log_secure => 1}); # Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks # is to setup the database server. $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); # If I have no databases, sleep for a second and then exit (systemd will restart us). if (not $anvil->data->{sys}{database}{connections}) { # Try to configure the local database, and then try to connect again. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, secure => 0, key => "log_0201"}); prep_database($anvil); sleep 1; # Try connecting again $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # Still nothing, sleep and exit. print $anvil->Words->string({key => "error_0003"})."\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "error_0003"}); $anvil->nice_exit({exit_code => 2}); } } # Read switches $anvil->data->{switches}{'refresh-json'} = ""; $anvil->data->{switches}{'run-once'} = 0; $anvil->data->{switches}{'main-loop-only'} = 0; $anvil->data->{switches}{'no-start'} = 0; $anvil->Get->switches; if ($anvil->data->{switches}{'refresh-json'}) { $anvil->data->{switches}{'run-once'} = 1; $anvil->data->{switches}{'main-loop-only'} = 1; $anvil->data->{switches}{'no-start'} = 1; } # There are some things we only want to run on (re)start and don't need to always run. run_once($anvil) if not $anvil->data->{switches}{'main-loop-only'}; # Calculate my sum so that we can exit if it changes later. $anvil->Storage->record_md5sums; # Disconnect. We'll reconnect inside the loop $anvil->Database->disconnect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0203"}); # This will prevent restarting while jobs are running. $anvil->data->{sys}{jobs_running} = 0; # These are the things we always want running. while(1) { # Connect to the database(s) $anvil->Storage->read_config({file => "/etc/anvil/anvil.conf"}); $anvil->Database->connect({debug => 2}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if ($anvil->data->{sys}{database}{connections}) { # Loop and sleep for 2s. keep_running($anvil); } else { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, secure => 0, key => "log_0202"}); } # Exit if called with '--run-once' if ($anvil->data->{switches}{'run-once'}) { $anvil->nice_exit({code => 0}); } # Has the file on disk changed? if ((not $anvil->data->{sys}{jobs_running}) && ($anvil->Storage->check_md5sums)) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "warn", key => "message_0014"}); $anvil->nice_exit({code => 1}); } # Exit if 'run-once' selected. if ($anvil->data->{switches}{'run-once'}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "warn", key => "message_0055"}); $anvil->nice_exit({code => 0}); } # Disconnect from the database(s) and sleep now. $anvil->Database->disconnect({debug => 2}); sleep 2; } $anvil->nice_exit({code => 0}); ############################################################################################################# # Functions # ############################################################################################################# # These are tools that don't need to constantly run. They'll typically run when the server starts up or the # daemon is restarted or reloaded. sub run_once { my ($anvil) = @_; # Check that the database is ready. prep_database($anvil); # If the uptime is less than ten minutes, clear the reboot flag. my $uptime = $anvil->Storage->read_file({ debug => 2, force_read => 1, cache => 0, file => $anvil->data->{path}{proc}{uptime}, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); # Clean it up. We'll have gotten two numbers, the uptime in seconds (to two decimal places) and the # total idle time. We only care about the int number. $uptime =~ s/^(\d+)\..*$/$1/; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { uptime => $uptime }}); if ($uptime < 600) { # Clear the reboot request. my $output = $anvil->System->call({ debug => 2, shell_call => $anvil->data->{path}{exe}{'anvil-reboot-needed'}." --reboot-needed 0", source => $THIS_FILE, line => __LINE__, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output }}); } return(0); } # Configure the local database, if needed. sub prep_database { my ($anvil) = @_; my $shell_call = $anvil->data->{path}{exe}{'anvil-prep-database'}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); my $database_output = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); if ($database_output) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { database_output => $database_output }}); } return(0); } # These are tools that need to keep running. sub keep_running { my ($anvil) = @_; # Check for jobs that were running and now exited. if (exists $anvil->data->{processes}) { foreach my $job_uuid (%{$anvil->data->{jobs}{handles}}) { # If it's not a handle, delete it. my $running = $anvil->data->{jobs}{handles}{$job_uuid}->poll(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, running => $running, }}); # If it's not running, update the table to clear the 'job_picked_up_by' column. if (not $running) { my $exit_status = $anvil->data->{jobs}{handles}{$job_uuid}->exit_status(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { exit_status => $exit_status }}); # Free up memory $anvil->data->{jobs}{handles}{$job_uuid}->cleanup(); clear_job($anvil, $job_uuid); } } } # Update hardware state files. update_state_file($anvil); # Run any pending jobs by calling 'anvil-jobs' with the 'job_uuid' as a background process. run_jobs($anvil); return(0); } # This clears the 'job_picked_up_by'. sub clear_job { my ($anvil, $job_uuid) = @_; my $query = " UPDATE jobs SET job_picked_up_by = '0', modified_date = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})." WHERE job_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($job_uuid)." "; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); return(0); } # This will check for any jobs that aren't at 100%. For each found, if 'picked_up_by' is set, a check is made # to see if the PID is still alive. If it isn't, or if 'picked_up_by' is not set, the appropriate tool is # invoked to handle it. sub run_jobs { my ($anvil) = @_; # This will be set to 1 if any jobs are not complete, preventing a restart of the daemon if it's # changed on disk. $anvil->data->{sys}{jobs_running} = 0; # We'll also update the jobs.json file. my $jobs_file = "{\"jobs\":[\n"; # Get a list of pending or incomplete jobs. my $return = $anvil->Database->get_jobs({debug => 2, ended_within => 300}); my $count = @{$return}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'return' => $return, count => $count, }}); foreach my $hash_ref (@{$return}) { my $job_uuid = $hash_ref->{job_uuid}; my $job_command = $hash_ref->{job_command}; my $job_data = $hash_ref->{job_data}; my $job_picked_up_by = $hash_ref->{job_picked_up_by}; my $job_picked_up_at = $hash_ref->{job_picked_up_at}; my $job_updated = $hash_ref->{job_updated}; my $job_name = $hash_ref->{job_name}; my $job_progress = $hash_ref->{job_progress}; my $job_title = $hash_ref->{job_title}; my $job_description = $hash_ref->{job_description}; my $job_status = $hash_ref->{job_status}; my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0; my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid, job_command => $job_command, job_data => $job_data, job_picked_up_by => $job_picked_up_by, job_picked_up_at => $job_picked_up_at, job_updated => $job_updated, job_name => $job_name, job_progress => $job_progress, job_title => $job_title, job_description => $job_description, job_status => $job_status, started_seconds_ago => $started_seconds_ago, updated_seconds_ago => $updated_seconds_ago, }}); if ($job_progress ne "100") { $anvil->data->{sys}{jobs_running} = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::jobs_running" => $anvil->data->{sys}{jobs_running} }}); } # See if the job was picked up by a now-dead instance. if ($job_picked_up_by) { # Check if the PID is still active. $anvil->System->pids({ignore_me => 1}); ### TODO: Add a check to verify the job isn't hung. # Skip if this job is in progress. if (not exists $anvil->data->{pids}{$job_picked_up_by}) { # If the job is done, just clear the 'job_picked_up_by' and be done. clear_job($anvil, $job_uuid); if ($job_progress ne "100") { # The previous job is gone, but the job isn't finished. Start it again. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "striker_warning_0007", variables => { command => $job_command, pid => $job_picked_up_by, percent => $job_progress, }}); # Clear some variables. $job_progress = 0; $job_status = "message_0056"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_progress => $job_progress, job_status => $job_status, }}); } # Clear the PID $job_picked_up_by = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }}); } } # Convert the double-banged strings into a proper message. my $say_title = $job_title ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_title}) : ""; my $say_description = $job_description ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_description}) : ""; my $say_status = $job_status ? $anvil->Words->parse_banged_string({debug => 2, key_string => $job_status}) : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_title => $job_title, say_description => $say_description, say_status => $say_status, }}); # Make the status HTML friendly. Strip any embedded HTML then encode the text string. if ($say_status) { my $html_strip = HTML::Strip->new(); $say_status = $html_strip->parse($say_status); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_status => $say_status }}); # Now make the resulting text string HTML friendly my $text_to_html = HTML::FromText->new({ urls => 1, email => 1, lines => 1, }); $say_status = $text_to_html->parse($say_status); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_status => $say_status }}); } # Add this to the jobs.json file my $json_string = to_json ({ job_uuid => $job_uuid, job_command => $job_command, job_data => $job_data, job_picked_up_at => $job_picked_up_at, job_updated => $job_updated, job_name => $job_name, job_progress => $job_progress, job_title => $say_title, job_description => $say_description, job_status => $say_status, started_seconds_ago => $started_seconds_ago, updated_seconds_ago => $updated_seconds_ago, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { json_string => $json_string }}); $jobs_file .= $json_string.",\n"; # If the job is done, move on. next if $job_progress eq "100"; # If the job is not running, start it. if ((not $job_picked_up_by) && (not $anvil->data->{switches}{'no-start'})) { # Start the job, appending '--job-uuid' to the command. $anvil->data->{jobs}{handles}{$job_uuid} = $anvil->System->call({ debug => 2, background => 1, stdout_file => "/tmp/anvil.job.".$job_uuid.".stdout", stderr_file => "/tmp/anvil.job.".$job_uuid.".stderr", shell_call => $job_command." --job-uuid ".$job_uuid, source => $THIS_FILE, line => __LINE__, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid} }}); # Log the PID (the job should update the database). my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); } } # Close the jobs file. $jobs_file =~ s/,\n$/\n/ms; $jobs_file .= "]}\n"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { jobs_file => $jobs_file }}); # Write the JSON file my $output_json = $anvil->data->{path}{directories}{html}."/status/jobs.json"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output_xml => $output_json }}); $anvil->Storage->write_file({ file => $output_json, body => $jobs_file, overwrite => 1, mode => "0644", user => "apache", group => "apache" }); return(0); } # This calls 'anvil-update-states' which will scan the local machine's state (hardware and software) and # record write it out to an HTML file sub update_state_file { my ($anvil) = @_; my $shell_call = $anvil->data->{path}{exe}{'anvil-update-states'}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { shell_call => $shell_call }}); my $states_output = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__}); if ($states_output) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { states_output => $states_output }}); } return(0); }