* In the never-ending attempt to resolve the build consistency issues, this commit enables extra debugging logging and, hopefully, implements a fix in anvil-daemon where a job could be started repeatedly.

* Renamed the special job status 'scancore_startup' to 'anvil_startup', given it's handled by anvil-daemon.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 3080b2c0fd
commit 023f43eda9
  1. 2
      Anvil/Tools/Database.pm
  2. 1
      share/words.xml
  3. 8
      tools/anvil-configure-host
  4. 40
      tools/anvil-daemon
  5. 2
      tools/striker-auto-initialize-all

@ -7849,7 +7849,7 @@ Variables can not be passed to this title key.
* This is not required when C<< update_progress_only >> is set * This is not required when C<< update_progress_only >> is set
B<< Note >>: This can be set to the special C<< scancore_startup >>. When the job status is set to this value, the job will only run when ScanCore next starts up (generally after a reboot). B<< Note >>: This can be set to the special C<< anvil_startup >>. When the job status is set to this value, the job will only run when ScanCore next starts up (generally after a reboot).
=head3 job_uuid (optional) =head3 job_uuid (optional)

@ -1831,6 +1831,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0636">No servers are running on either node. Skipping fence delay preference checks for now.</key> <key name="log_0636">No servers are running on either node. Skipping fence delay preference checks for now.</key>
<key name="log_0637">We've got: [#!variable!local_server_count!#] servers, and the peer has: [#!variable!peer_server_count!#] servers. Skipping fence delay preference checks for now.</key> <key name="log_0637">We've got: [#!variable!local_server_count!#] servers, and the peer has: [#!variable!peer_server_count!#] servers. Skipping fence delay preference checks for now.</key>
<key name="log_0638">We're hosting servers, and our peer is not. Making the fence delay favours this node.</key> <key name="log_0638">We're hosting servers, and our peer is not. Making the fence delay favours this node.</key>
<key name="log_0639">The Anvil! daemon is in startup mode, and the job: [#!variable!job_uuid!#], command: [#!variable!job_command!#] is not a startup job, ignoring it for now.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -1459,7 +1459,7 @@ sub pickup_job_details
check => 1, check => 1,
job_uuid => $anvil->data->{switches}{'job-uuid'}, job_uuid => $anvil->data->{switches}{'job-uuid'},
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'return' => $return }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'return' => $return }});
if ($return == 1) if ($return == 1)
{ {
# It's not a valid UUID. # It's not a valid UUID.
@ -1474,7 +1474,7 @@ sub pickup_job_details
# Still alive? Good. # Still alive? Good.
my $job_picked_up_by = $anvil->data->{jobs}{job_picked_up_by}; my $job_picked_up_by = $anvil->data->{jobs}{job_picked_up_by};
my $job_progress = $anvil->data->{jobs}{job_progress}; my $job_progress = $anvil->data->{jobs}{job_progress};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_picked_up_by => $job_picked_up_by, job_picked_up_by => $job_picked_up_by,
job_progress => $job_progress, job_progress => $job_progress,
}}); }});
@ -1483,7 +1483,7 @@ sub pickup_job_details
if ($job_picked_up_by) if ($job_picked_up_by)
{ {
# The previous job is gone if we're still alive, we'll take this over. # The previous job is gone if we're still alive, we'll take this over.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, 'print' => 1, key => "log_0147", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0147", variables => {
pid => $job_picked_up_by, pid => $job_picked_up_by,
percent => $job_progress, percent => $job_progress,
}}); }});
@ -1512,7 +1512,7 @@ AND
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results}; my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
results => $results, results => $results,
count => $count, count => $count,
}}); }});

@ -971,7 +971,7 @@ AND
# This handles weird bits for things like bug work-arounds. # This handles weird bits for things like bug work-arounds.
handle_special_cases($anvil); handle_special_cases($anvil);
# Now look for jobs that have a job status of 'scancore_startup' # Now look for jobs that have a job status of 'anvil_startup'
run_jobs($anvil, 1); run_jobs($anvil, 1);
# Check the firewall needs to be updated. # Check the firewall needs to be updated.
@ -1150,6 +1150,16 @@ sub run_jobs
updated_seconds_ago => $updated_seconds_ago, updated_seconds_ago => $updated_seconds_ago,
}}); }});
# If this is a start-up call, only start jobs whose status is 'anvil_startup'.
if (($startup) && ($say_status ne "anvil_startup"))
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0639", variables => {
job_uuid => $job_uuid,
job_command => $job_command,
}});
next;
}
if ($job_progress ne "100") if ($job_progress ne "100")
{ {
$anvil->data->{sys}{jobs_running} = 1; $anvil->data->{sys}{jobs_running} = 1;
@ -1175,7 +1185,7 @@ sub run_jobs
if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid})) if ((not exists $anvil->data->{lost_job_count}{$job_uuid}) or (not defined $anvil->data->{lost_job_count}{$job_uuid}))
{ {
$anvil->data->{lost_job_count}{$job_uuid} = 0; $anvil->data->{lost_job_count}{$job_uuid} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
} }
if ($anvil->data->{lost_job_count}{$job_uuid} > 5) if ($anvil->data->{lost_job_count}{$job_uuid} > 5)
{ {
@ -1189,26 +1199,26 @@ sub run_jobs
# Clear some variables. # Clear some variables.
$job_progress = 0; $job_progress = 0;
$job_status = "message_0056"; $job_status = "message_0056";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_progress => $job_progress, job_progress => $job_progress,
job_status => $job_status, job_status => $job_status,
}}); }});
# Clear the job. # Clear the job.
$anvil->Job->clear({debug => 3, job_uuid => $job_uuid}); $anvil->Job->clear({debug => 2, job_uuid => $job_uuid});
$anvil->data->{lost_job_count}{$job_uuid} = 0; $anvil->data->{lost_job_count}{$job_uuid} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
} }
else else
{ {
$anvil->data->{lost_job_count}{$job_uuid}++; $anvil->data->{lost_job_count}{$job_uuid}++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "lost_job_count::${job_uuid}" => $anvil->data->{lost_job_count}{$job_uuid} }});
} }
} }
# Clear the PID # Clear the PID
$job_picked_up_by = 0; $job_picked_up_by = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { job_picked_up_by => $job_picked_up_by }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_picked_up_by => $job_picked_up_by }});
} }
} }
@ -1259,9 +1269,10 @@ sub run_jobs
# If the job is done, move on. # If the job is done, move on.
next if $job_progress eq "100"; next if $job_progress eq "100";
next if $anvil->data->{switches}{'no-start'};
# If 'startup' is set, we only care if 'job_status' is 'scancore_startup' # If 'startup' is set, we only care if 'job_status' is 'anvil_startup'
if ((not $startup) && ($say_status eq "scancore_startup")) if ((not $startup) && ($say_status eq "anvil_startup"))
{ {
# Skip this, it will run next time anvil-daemon restarts. # Skip this, it will run next time anvil-daemon restarts.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0593", variables => {
@ -1272,7 +1283,7 @@ sub run_jobs
} }
# If the job is not running, start it. # If the job is not running, start it.
if ((not $job_picked_up_by) && ($job_progress ne "100") && (not $anvil->data->{switches}{'no-start'})) if (not $job_picked_up_by)
{ {
my $command = $job_command." --job-uuid ".$job_uuid; my $command = $job_command." --job-uuid ".$job_uuid;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0210", variables => { command => $command }});
@ -1303,15 +1314,18 @@ sub run_jobs
source => $THIS_FILE, source => $THIS_FILE,
line => __LINE__, line => __LINE__,
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { "jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid}, return_code => $return_code }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"jobs::handles::${job_uuid}" => $anvil->data->{jobs}{handles}{$job_uuid},
return_code => $return_code,
}});
# Log the PID (the job should update the database). # Log the PID (the job should update the database).
my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid(); my $pid = $anvil->data->{jobs}{handles}{$job_uuid}->pid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { pid => $pid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
# Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute. # Record that we've tried to start this job, so that we don't try to restart it for any reason for at least a minute.
$anvil->data->{jobs}{$job_uuid}{started} = time; $anvil->data->{jobs}{$job_uuid}{started} = time;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'jobs::$job_uuid::started' => $anvil->data->{jobs}{$job_uuid}{started} }});
} }
} }

@ -1685,7 +1685,7 @@ sub striker_stage1
job_name => "configure::auto_initialize", job_name => "configure::auto_initialize",
job_title => "job_0225", job_title => "job_0225",
job_description => "job_0226", job_description => "job_0226",
job_status => "scancore_startup", job_status => "anvil_startup",
job_progress => 0, job_progress => 0,
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});

Loading…
Cancel
Save