Updated anvil-daemon to check if anvil-sync-shared is running if the reported RAM use is too high. If so, it doesn't exit. This fixes an issue where anvil-sync-shared would loop forever as it would constantly be killed when downloading large files.

Signed-off-by: Digimer <digimer@alteeve.ca>
This commit is contained in:
Digimer 2022-02-27 21:29:30 -05:00
parent da0b1f6db1
commit b234b79544
3 changed files with 55 additions and 0 deletions

View File

@ -3854,6 +3854,11 @@ sub get_jobs
job_host_uuid => $job_host_uuid, job_host_uuid => $job_host_uuid,
}}); }});
if (exists $anvil->data->{jobs}{running})
{
delete $anvil->data->{jobs}{running};
}
my $query = " my $query = "
SELECT SELECT
job_uuid, job_uuid,
@ -3937,6 +3942,31 @@ WHERE
job_status => $job_status, job_status => $job_status,
modified_date => $modified_date, modified_date => $modified_date,
}; };
$anvil->data->{jobs}{running}{$job_uuid}{job_command} = $job_command;
$anvil->data->{jobs}{running}{$job_uuid}{job_data} = $job_data;
$anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_by} = $job_picked_up_by;
$anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_at} = $job_picked_up_at;
$anvil->data->{jobs}{running}{$job_uuid}{job_updated} = $job_updated;
$anvil->data->{jobs}{running}{$job_uuid}{job_name} = $job_name;
$anvil->data->{jobs}{running}{$job_uuid}{job_progress} = $job_progress;
$anvil->data->{jobs}{running}{$job_uuid}{job_title} = $job_title;
$anvil->data->{jobs}{running}{$job_uuid}{job_description} = $job_description;
$anvil->data->{jobs}{running}{$job_uuid}{job_status} = $job_status;
$anvil->data->{jobs}{running}{$job_uuid}{modified_date} = $modified_date;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"jobs::running::${job_uuid}::job_command" => $anvil->data->{jobs}{running}{$job_uuid}{job_command},
"jobs::running::${job_uuid}::job_data" => $anvil->data->{jobs}{running}{$job_uuid}{job_data},
"jobs::running::${job_uuid}::job_picked_up_by" => $anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_by},
"jobs::running::${job_uuid}::job_picked_up_at" => $anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_at},
"jobs::running::${job_uuid}::job_updated" => $anvil->data->{jobs}{running}{$job_uuid}{job_updated},
"jobs::running::${job_uuid}::job_name" => $anvil->data->{jobs}{running}{$job_uuid}{job_name},
"jobs::running::${job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$job_uuid}{job_progress},
"jobs::running::${job_uuid}::job_title" => $anvil->data->{jobs}{running}{$job_uuid}{job_title},
"jobs::running::${job_uuid}::job_description" => $anvil->data->{jobs}{running}{$job_uuid}{job_description},
"jobs::running::${job_uuid}::job_status" => $anvil->data->{jobs}{running}{$job_uuid}{job_status},
"jobs::running::${job_uuid}::modified_date" => $anvil->data->{jobs}{running}{$job_uuid}{modified_date},
}});
} }
my $return_count = @{$return}; my $return_count = @{$return};

View File

@ -3123,6 +3123,7 @@ We will sleep a bit and try again.
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key> <key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key> <key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key> <key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
<key name="warning_0139">[ Warning ] - We're using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). but there is a job: [#!variable!job_command!#] is runnng, which might be why the RAM is high. NOT exiting while this program is running.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. --> <!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. --> <!-- Run 'striker-parse-os-list to find new entries. -->

View File

@ -274,6 +274,30 @@ sub check_ram
}}); }});
if ($problem) if ($problem)
{ {
# See if an [anvil-sync-shared' job is running and, if so, don't exit. The file copy is
# counted and not an actual problem.
$anvil->Database->get_jobs({debug => 2});
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}})
{
my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command};
my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
job_command => $job_command,
job_progress => $job_progress,
}});
if (($job_progress != 100) && ($job_command =~ /anvil-sync-shared/))
{
# Don't abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => {
job_command => $job_command,
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
}});
return(0);
}
}
# Send an alert and exit. # Send an alert and exit.
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => { $anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => {
program => $THIS_FILE, program => $THIS_FILE,