Updated anvil-daemon to check if anvil-sync-shared is running if the reported RAM use is too high. If so, it doesn't exit. This fixes an issue where anvil-sync-shared would loop forever as it would constantly be killed when downloading large files.
Signed-off-by: Digimer <digimer@alteeve.ca>
This commit is contained in:
parent
da0b1f6db1
commit
b234b79544
@ -3854,6 +3854,11 @@ sub get_jobs
|
|||||||
job_host_uuid => $job_host_uuid,
|
job_host_uuid => $job_host_uuid,
|
||||||
}});
|
}});
|
||||||
|
|
||||||
|
if (exists $anvil->data->{jobs}{running})
|
||||||
|
{
|
||||||
|
delete $anvil->data->{jobs}{running};
|
||||||
|
}
|
||||||
|
|
||||||
my $query = "
|
my $query = "
|
||||||
SELECT
|
SELECT
|
||||||
job_uuid,
|
job_uuid,
|
||||||
@ -3937,6 +3942,31 @@ WHERE
|
|||||||
job_status => $job_status,
|
job_status => $job_status,
|
||||||
modified_date => $modified_date,
|
modified_date => $modified_date,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_command} = $job_command;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_data} = $job_data;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_by} = $job_picked_up_by;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_at} = $job_picked_up_at;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_updated} = $job_updated;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_name} = $job_name;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_progress} = $job_progress;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_title} = $job_title;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_description} = $job_description;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{job_status} = $job_status;
|
||||||
|
$anvil->data->{jobs}{running}{$job_uuid}{modified_date} = $modified_date;
|
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
|
||||||
|
"jobs::running::${job_uuid}::job_command" => $anvil->data->{jobs}{running}{$job_uuid}{job_command},
|
||||||
|
"jobs::running::${job_uuid}::job_data" => $anvil->data->{jobs}{running}{$job_uuid}{job_data},
|
||||||
|
"jobs::running::${job_uuid}::job_picked_up_by" => $anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_by},
|
||||||
|
"jobs::running::${job_uuid}::job_picked_up_at" => $anvil->data->{jobs}{running}{$job_uuid}{job_picked_up_at},
|
||||||
|
"jobs::running::${job_uuid}::job_updated" => $anvil->data->{jobs}{running}{$job_uuid}{job_updated},
|
||||||
|
"jobs::running::${job_uuid}::job_name" => $anvil->data->{jobs}{running}{$job_uuid}{job_name},
|
||||||
|
"jobs::running::${job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$job_uuid}{job_progress},
|
||||||
|
"jobs::running::${job_uuid}::job_title" => $anvil->data->{jobs}{running}{$job_uuid}{job_title},
|
||||||
|
"jobs::running::${job_uuid}::job_description" => $anvil->data->{jobs}{running}{$job_uuid}{job_description},
|
||||||
|
"jobs::running::${job_uuid}::job_status" => $anvil->data->{jobs}{running}{$job_uuid}{job_status},
|
||||||
|
"jobs::running::${job_uuid}::modified_date" => $anvil->data->{jobs}{running}{$job_uuid}{modified_date},
|
||||||
|
}});
|
||||||
}
|
}
|
||||||
|
|
||||||
my $return_count = @{$return};
|
my $return_count = @{$return};
|
||||||
|
@ -3123,6 +3123,7 @@ We will sleep a bit and try again.
|
|||||||
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
|
<key name="warning_0136">[ Warning ] - Failed to connect to the host: [#!variable!host!#]! Unable to up the resource, so the server may not start. If the peer can't be recovered, manually forcing the local resource(s) to UpToDate may be required.</key>
|
||||||
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
|
<key name="warning_0137">[ Warning ] - Timed out waiting for the connections to the peers, and the local resource(s) is not in 'UpToDate' state. Booting the server will likely fail.</key>
|
||||||
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
|
<key name="warning_0138">[ Warning ] - Timed out waiting for the connections to the peers.</key>
|
||||||
|
<key name="warning_0139">[ Warning ] - We're using: [#!variable!ram_used!#] (#!variable!ram_used_bytes!# Bytes). but there is a job: [#!variable!job_command!#] is runnng, which might be why the RAM is high. NOT exiting while this program is running.</key>
|
||||||
|
|
||||||
<!-- The entries below here are not sequential, but use a key to find the entry. -->
|
<!-- The entries below here are not sequential, but use a key to find the entry. -->
|
||||||
<!-- Run 'striker-parse-os-list to find new entries. -->
|
<!-- Run 'striker-parse-os-list to find new entries. -->
|
||||||
|
@ -274,6 +274,30 @@ sub check_ram
|
|||||||
}});
|
}});
|
||||||
if ($problem)
|
if ($problem)
|
||||||
{
|
{
|
||||||
|
# See if an [anvil-sync-shared' job is running and, if so, don't exit. The file copy is
|
||||||
|
# counted and not an actual problem.
|
||||||
|
$anvil->Database->get_jobs({debug => 2});
|
||||||
|
foreach my $job_uuid (keys %{$anvil->data->{jobs}{running}})
|
||||||
|
{
|
||||||
|
my $job_command = $anvil->data->{jobs}{running}{$job_uuid}{job_command};
|
||||||
|
my $job_progress = $anvil->data->{jobs}{running}{$job_uuid}{job_progress};
|
||||||
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
||||||
|
job_command => $job_command,
|
||||||
|
job_progress => $job_progress,
|
||||||
|
}});
|
||||||
|
|
||||||
|
if (($job_progress != 100) && ($job_command =~ /anvil-sync-shared/))
|
||||||
|
{
|
||||||
|
# Don't abort.
|
||||||
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "alert", key => "warning_0139", variables => {
|
||||||
|
job_command => $job_command,
|
||||||
|
ram_used => $anvil->Convert->bytes_to_human_readable({'bytes' => $ram_used}),
|
||||||
|
ram_used_bytes => $anvil->Convert->add_commas({number => $ram_used}),
|
||||||
|
}});
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Send an alert and exit.
|
# Send an alert and exit.
|
||||||
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => {
|
$anvil->Alert->register({alert_level => "notice", message => "error_0357", variables => {
|
||||||
program => $THIS_FILE,
|
program => $THIS_FILE,
|
||||||
|
Loading…
Reference in New Issue
Block a user