From 023f43eda93d3aaf78f38068eb8ad3fe37bd19c8 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 22 Jun 2021 16:08:25 -0400 Subject: [PATCH] * In the never-ending attempt to resolve the build consistency issues, this commit enables extra debugging logging and, hopefully, implements a fix in anvil-daemon where a job could be started repeatedly. * Renamed the special job status 'scancore_startup' to 'anvil_startup', given it's handled by anvil-daemon. Signed-off-by: Digimer --- Anvil/Tools/Database.pm | 2 +- share/words.xml | 1 + tools/anvil-configure-host | 8 +++---- tools/anvil-daemon | 40 +++++++++++++++++++++---------- tools/striker-auto-initialize-all | 2 +- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 70006683..23f256d4 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -7849,7 +7849,7 @@ Variables can not be passed to this title key. * This is not required when C<< update_progress_only >> is set -B<< Note >>: This can be set to the special C<< scancore_startup >>. When the job status is set to this value, the job will only run when ScanCore next starts up (generally after a reboot). +B<< Note >>: This can be set to the special C<< anvil_startup >>. When the job status is set to this value, the job will only run when ScanCore next starts up (generally after a reboot). =head3 job_uuid (optional) diff --git a/share/words.xml b/share/words.xml index aa545227..69c87884 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1831,6 +1831,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: No servers are running on either node. Skipping fence delay preference checks for now. We've got: [#!variable!local_server_count!#] servers, and the peer has: [#!variable!peer_server_count!#] servers. Skipping fence delay preference checks for now. We're hosting servers, and our peer is not. Making the fence delay favours this node. + The Anvil! daemon is in startup mode, and the job: [#!variable!job_uuid!#], command: [#!variable!job_command!#] is not a startup job, ignoring it for now. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-configure-host b/tools/anvil-configure-host index fb6c4499..583b8adc 100755 --- a/tools/anvil-configure-host +++ b/tools/anvil-configure-host @@ -1459,7 +1459,7 @@ sub pickup_job_details check => 1, job_uuid => $anvil->data->{switches}{'job-uuid'}, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'return' => $return }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'return' => $return }}); if ($return == 1) { # It's not a valid UUID. @@ -1474,7 +1474,7 @@ sub pickup_job_details # Still alive? Good. my $job_picked_up_by = $anvil->data->{jobs}{job_picked_up_by}; my $job_progress = $anvil->data->{jobs}{job_progress}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by, job_progress => $job_progress, }}); @@ -1483,7 +1483,7 @@ sub pickup_job_details if ($job_picked_up_by) { # The previous job is gone if we're still alive, we'll take this over. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, 'print' => 1, key => "log_0147", variables => { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0147", variables => { pid => $job_picked_up_by, percent => $job_progress, }}); @@ -1512,7 +1512,7 @@ AND my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { results => $results, count => $count, }}); diff --git a/tools/anvil-daemon b/tools/anvil-daemon index b420a529..2ef5a479 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -971,7 +971,7 @@ AND # This handles weird bits for things like bug work-arounds. handle_special_cases($anvil); - # Now look for jobs that have a job status of 'scancore_startup' + # Now look for jobs that have a job status of 'anvil_startup' run_jobs($anvil, 1); # Check the firewall needs to be updated. @@ -1150,6 +1150,16 @@ sub run_jobs updated_seconds_ago => $updated_seconds_ago, }}); + # If this is a start-up call, only start jobs whose status is 'anvil_startup'. + if (($startup) && ($say_status ne "anvil_startup")) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => { + job_uuid => $job_uuid, + job_command => $job_command, + }}); + next; + } + if ($job_progress ne "100") { $anvil->data->{sys}{jobs_running} = 1; @@ -1175,7 +1185,7 @@ sub run_jobs if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid})) { $anvil->data->{lost_job_count}{$job_uuid} = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } if ($anvil->data->{lost_job_count}{$job_uuid} > 5) { @@ -1189,26 +1199,26 @@ sub run_jobs # Clear some variables. $job_progress = 0; $job_status = "message_0056"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_progress => $job_progress, job_status => $job_status, }}); # Clear the job. - $anvil->Job->clear({debug => 3, job_uuid => $job_uuid}); + $anvil->Job->clear({debug => 2, job_uuid => $job_uuid}); $anvil->data->{lost_job_count}{$job_uuid} = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } else { $anvil->data->{lost_job_count}{$job_uuid}++; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); } } # Clear the PID $job_picked_up_by = 0; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { job_picked_up_by => $job_picked_up_by }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }}); } } @@ -1259,9 +1269,10 @@ sub run_jobs # If the job is done, move on. next if $job_progress eq "100"; + next if $anvil->data->{switches}{'no-start'}; - # If 'startup' is set, we only care if 'job_status' is 'scancore_startup' - if ((not $startup) && ($say_status eq "scancore_startup")) + # If 'startup' is set, we only care if 'job_status' is 'anvil_startup' + if ((not $startup) && ($say_status eq "anvil_startup")) { # Skip this, it will run next time anvil-daemon restarts. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => { @@ -1272,7 +1283,7 @@ sub run_jobs } # If the job is not running, start it. - if ((not $job_picked_up_by) && ($job_progress ne "100") && (not $anvil->data->{switches}{'no-start'})) + if (not $job_picked_up_by) { my $command = $job_command." --job-uuid ".$job_uuid; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }}); @@ -1303,15 +1314,18 @@ sub run_jobs source => $THIS_FILE, line => __LINE__, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, return_code => $return_code }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, + return_code => $return_code, + }}); # Log the PID (the job should update the database). my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { pid => $pid }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }}); # Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute. $anvil->data->{jobs}{$job_uuid}{started} = time; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }}); } } diff --git a/tools/striker-auto-initialize-all b/tools/striker-auto-initialize-all index e5149c22..c6a58c7c 100755 --- a/tools/striker-auto-initialize-all +++ b/tools/striker-auto-initialize-all @@ -1685,7 +1685,7 @@ sub striker_stage1 job_name => "configure::auto_initialize", job_title => "job_0225", job_description => "job_0226", - job_status => "scancore_startup", + job_status => "anvil_startup", job_progress => 0, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});