You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
475 lines
17 KiB
475 lines
17 KiB
#!/usr/bin/perl |
|
# |
|
# This takes a URL (ftp, http or https) and downloads the file. If it is called without --url, it shows the |
|
# progress of any other instances currently downloading files. |
|
# |
|
# Return codes: |
|
# 0 = Normal exit. |
|
# 1 = No database connections available |
|
# 2 = The requested URL was not found on the remote server. |
|
# 3 = The requested URL does not resolve to a known domain. |
|
# 4 = The requested URL failed because the remote host refused the connection. |
|
# 5 = The requested URL failed because there is no route to that host. |
|
# 6 = Abort requested, but UUID or PID not passed |
|
# 7 = The requested URL failed because the network is unreachable. |
|
# 8 = The file to download already exists. |
|
# 9 = Something went wrong moving the file from temp to the output directory. |
|
# 10 = URL not found. |
|
# 11 = The --job-uuid is invalid |
|
# 12 = The --job-uuid is already being handled by another process |
|
# |
|
# |
|
# TODO: |
|
# - |
|
# |
|
# NOTE: |
|
# - |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
use Data::Dumper; |
|
|
|
# Disable buffering |
|
$| = 1; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
my $anvil = Anvil::Tools->new(); |
|
$anvil->Log->level({set => 2}); |
|
$anvil->Log->secure({set => 1}); |
|
|
|
$anvil->Database->connect; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# No databases, exit. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
$anvil->data->{switches}{abort} = ""; |
|
$anvil->data->{switches}{'job-uuid'} = ""; |
|
$anvil->data->{switches}{overwrite} = ""; |
|
$anvil->data->{switches}{'save-to'} = ""; # /mnt/shared/files by default |
|
$anvil->data->{switches}{script} = ""; |
|
$anvil->data->{switches}{url} = ""; |
|
$anvil->Get->switches; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'switches::abort' => $anvil->data->{switches}{abort}, |
|
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, |
|
'switches::overwrite' => $anvil->data->{switches}{overwrite}, |
|
'switches::save-to' => $anvil->data->{switches}{'save-to'}, |
|
'switches::script' => $anvil->data->{switches}{script}, |
|
'switches::url' => $anvil->data->{switches}{url}, |
|
}}); |
|
|
|
# If I don't have --abort or --url, see if there is a job waiting |
|
if ((not $anvil->data->{switches}{abort}) && (not $anvil->data->{switches}{url})) |
|
{ |
|
get_job_details($anvil); |
|
} |
|
die; |
|
|
|
# Do what now? |
|
if ($anvil->data->{switches}{abort}) |
|
{ |
|
# Kill the other download |
|
abort_download($anvil); |
|
} |
|
elsif ($anvil->data->{switches}{url}) |
|
{ |
|
# Try to download the file |
|
download_file($anvil); |
|
} |
|
else |
|
{ |
|
# Show the status of any downloading, finished, failed or aborted downloads. |
|
show_status($anvil); |
|
} |
|
|
|
|
|
# We're done |
|
$anvil->nice_exit({exit_code => 0}); |
|
|
|
|
|
############################################################################################################# |
|
# Private functions. # |
|
############################################################################################################# |
|
|
|
# This loads a job's details, or looks for unclaimed jobs that aren't finished. |
|
sub get_job_details |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# If I don't have a job-uuid, see if any jobs are pending |
|
if (not $anvil->data->{switches}{'job-uuid'}) |
|
{ |
|
my $job_uuid = $anvil->Job->get_job_uuid({debug => 2, program => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); |
|
if ($anvil->Validate->is_uuid({uuid => $job_uuid})) |
|
{ |
|
# Got one! |
|
$anvil->data->{switches}{'job-uuid'} = $job_uuid; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); |
|
} |
|
} |
|
|
|
# If we've got a job-uuid, load the details. |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); |
|
if ($anvil->data->{switches}{'job-uuid'}) |
|
{ |
|
my $problem = $anvil->Job->get_job_details({ |
|
debug => 2, |
|
check => 1, |
|
}); |
|
# If 'problem' is '1', the job-uuid is bad. If it's '2', it's already being handled by |
|
# another process. In either case, we're not going to handle this. |
|
if (not $problem) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
problem => $problem, |
|
"jobs::job_uuid" => $anvil->data->{jobs}{job_uuid}, |
|
"jobs::job_host_uuid" => $anvil->data->{jobs}{job_host_uuid}, |
|
"jobs::job_command" => $anvil->data->{jobs}{job_command}, |
|
"jobs::job_data" => $anvil->data->{jobs}{job_data}, |
|
"jobs::job_picked_up_by" => $anvil->data->{jobs}{job_picked_up_by}, |
|
"jobs::job_picked_up_at" => $anvil->data->{jobs}{job_picked_up_at}, |
|
"jobs::job_updated" => $anvil->data->{jobs}{job_updated}, |
|
"jobs::job_name" => $anvil->data->{jobs}{job_name}, |
|
"jobs::job_progress" => $anvil->data->{jobs}{job_progress}, |
|
"jobs::job_title" => $anvil->data->{jobs}{job_title}, |
|
"jobs::job_description" => $anvil->data->{jobs}{job_description}, |
|
"jobs::job_status" => $anvil->data->{jobs}{job_status}, |
|
}}); |
|
|
|
# Store the job-uuid in the switches hash. |
|
$anvil->data->{switches}{'job-uuid'} = $anvil->data->{jobs}{job_uuid}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); |
|
} |
|
elsif ($problem eq "1") |
|
{ |
|
# Bad UUID |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0061", variables => { job_uuid => $anvil->data->{jobs}{job_uuid} }}); |
|
$anvil->nice_exit({exit_code => 11}); |
|
} |
|
else |
|
{ |
|
# Job is already being handled by another active process. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0062", variables => { job_uuid => $anvil->data->{jobs}{job_uuid} }}); |
|
$anvil->nice_exit({exit_code => 12}); |
|
} |
|
|
|
# Pull apart the job-data and feed them into the switches hash |
|
if ($anvil->data->{switches}{'job-uuid'}) |
|
{ |
|
foreach my $pair (split/,/, $anvil->data->{jobs}{job_data}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pair => $pair }}); |
|
|
|
my ($variable, $value) = ($pair =~ /^(.*?)=(.*)$/); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
variable => $variable, |
|
value => $value, |
|
}}); |
|
|
|
$anvil->data->{switches}{$variable} = $value; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::${variable}" => $anvil->data->{switches}{$variable} }}); |
|
} |
|
} |
|
|
|
# If "job_picked_up_by" is set, clear it. (It won't be running because we did 'check' |
|
# earlier) |
|
if ($anvil->data->{jobs}{job_picked_up_by}) |
|
{ |
|
$anvil->Job->clear({ |
|
debug => 2, |
|
job_uuid => $anvil->data->{switches}{'job-uuid'}, |
|
}); |
|
} |
|
|
|
# Mark it as 1% done. |
|
$anvil->Job->update_progress({ |
|
debug => 2, |
|
job_uuid => $anvil->data->{switches}{'job-uuid'}, |
|
}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub download_file |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $failed = 0; |
|
my $url = $anvil->data->{switches}{url}; |
|
my $file_name = ($url =~ /^.*\/(.*)$/)[0]; |
|
my $temp_file = $anvil->data->{path}{directories}{shared}{temp}."/".$file_name; |
|
my $save_to = $anvil->data->{switches}{'save-to'} ? $anvil->data->{switches}{'save-to'} : $anvil->data->{path}{directories}{shared}{files}; |
|
my $out_file = $save_to."/".$file_name; |
|
$save_to =~ s/\/\///g; |
|
my $unix_start = time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
url => $url, |
|
file_name => $file_name, |
|
temp_file => $temp_file, |
|
unix_start => $unix_start, |
|
save_to => $save_to, |
|
}}); |
|
|
|
# Is this a supported protocol? |
|
if (($url !~ /^ftp\:\/\//) && ($url !~ /^http\:\/\//) && ($url !~ /^https\:\/\//)) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0286", variables => { url => $url }}); |
|
$anvil->nice_exit({exit_code => 10}); |
|
} |
|
|
|
# If the target file exists, exit. |
|
if ((-e $out_file) && (not $anvil->data->{switches}{overwrite})) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "err", key => "error_0059", variables => { file => $out_file }}); |
|
$anvil->nice_exit({exit_code => 8}); |
|
} |
|
|
|
# Make sure the output directory exists. |
|
if (not -e $save_to) |
|
{ |
|
my $failed = $anvil->Storage->make_directory({ |
|
debug => 2, |
|
directory => $save_to, |
|
mode => "0777", |
|
user => "apache", |
|
group => "apache", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
} |
|
if (not -e $anvil->data->{path}{directories}{shared}{temp}) |
|
{ |
|
my $failed = $anvil->Storage->make_directory({ |
|
debug => 2, |
|
directory => $anvil->data->{path}{directories}{shared}{temp}, |
|
mode => "0777", |
|
user => "apache", |
|
group => "apache", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
} |
|
|
|
### NOTE: We don't use System->call because we need to track the output in real time. |
|
# Try to download it. |
|
my $bytes_downloaded = 0; |
|
my $downloaded = 0; # Bytes |
|
my $percent = 0; |
|
my $rate = 0; # Bytes/sec |
|
my $time_left = 0; # Seconds |
|
my $running_time = 0; |
|
my $average_rate = 0; |
|
my $report_interval = 5; # Seconds between status file update |
|
my $next_report = time + $report_interval; |
|
my $shell_call = $anvil->data->{path}{exe}{wget}." --continue --progress=dot:binary ".$url." --output-document ".$temp_file; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
next_report => $next_report, |
|
shell_call => $shell_call, |
|
}}); |
|
open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }}); |
|
while(<$file_handle>) |
|
{ |
|
chomp; |
|
my $line = $anvil->Words->clean_spaces({string => $_});; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0017", variables => { line => $line }}); |
|
|
|
# Check for problems |
|
if (($line =~ /404/) && ($line =~ /Not Found/i)) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0287", variables => { url => $url }}); |
|
$failed = 2; |
|
} |
|
elsif ($line =~ /Name or service not known/i) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0288", variables => { url => $url }}); |
|
$failed = 3; |
|
} |
|
elsif ($line =~ /Connection refused/i) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0289", variables => { url => $url }}); |
|
$failed = 4; |
|
} |
|
elsif ($line =~ /route to host/i) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0290", variables => { url => $url }}); |
|
$failed = 5; |
|
} |
|
elsif ($line =~ /Network is unreachable/i) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0291", variables => { url => $url }}); |
|
$failed = 7; |
|
} |
|
elsif ($line =~ /^(\d+)K .*? (\d+)% (.*?) (\d+.*)$/) |
|
{ |
|
$downloaded = $1; |
|
$percent = $2; |
|
$rate = $3; |
|
$time_left = $4; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
downloaded => $url, |
|
percent => $percent, |
|
rate => $rate, |
|
time_left => $time_left, |
|
}}); |
|
|
|
### NOTE: According to: http://savannah.gnu.org/bugs/index.php?22765, wget uses base-2. |
|
# Convert |
|
$bytes_downloaded = $downloaded * 1024; |
|
my $say_downloaded = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $bytes_downloaded}); |
|
my $say_percent = $percent."%"; |
|
my $byte_rate = $anvil->Convert->human_readable_to_bytes({debug => 3, size => $rate, base2 => 1}); |
|
my $say_rate = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $byte_rate})."/s"; |
|
$running_time = time - $unix_start; |
|
my $say_running_time = $anvil->Convert->time({debug => 3, 'time' => $running_time, translate => 1}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
bytes_downloaded => $bytes_downloaded, |
|
say_downloaded => $say_downloaded, |
|
say_percent => $say_percent, |
|
byte_rate => $byte_rate, |
|
say_rate => $say_rate, |
|
running_time => $running_time, |
|
say_running_time => $say_running_time, |
|
}}); |
|
|
|
# Time left is a bit more complicated |
|
my $days = 0; |
|
my $hours = 0; |
|
my $minutes = 0; |
|
my $seconds = 0; |
|
if ($time_left =~ /(\d+)d/) |
|
{ |
|
$days = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { days => $days }}); |
|
} |
|
if ($time_left =~ /(\d+)h/) |
|
{ |
|
$hours = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hours => $hours }}); |
|
} |
|
if ($time_left =~ /(\d+)m/) |
|
{ |
|
$minutes = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { minutes => $minutes }}); |
|
} |
|
if ($time_left =~ /(\d+)s/) |
|
{ |
|
$seconds = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seconds => $seconds }}); |
|
} |
|
my $seconds_left = (($days * 86400) + ($hours * 3600) + ($minutes * 60) + $seconds); |
|
my $say_time_left = $anvil->Convert->time({debug => 3, 'time' => $seconds_left, suffix => "long", translate => 1}); |
|
$running_time = 1 if not $running_time; |
|
$average_rate = int($bytes_downloaded / $running_time); |
|
my $say_average_rate = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $average_rate})."/s"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
seconds_left => $seconds_left, |
|
say_time_left => $say_time_left, |
|
running_time => $running_time, |
|
average_rate => $average_rate, |
|
say_average_rate => $say_average_rate, |
|
}}); |
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'time' => time, |
|
next_report => $next_report, |
|
}}); |
|
if (time > $next_report) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
say_downloaded => $say_downloaded, |
|
percent => $percent, |
|
say_rate => $say_rate, |
|
running_time => $running_time, |
|
say_running_time => $say_running_time, |
|
seconds_left => $seconds_left, |
|
say_time_left => $say_time_left, |
|
say_percent => $say_percent, |
|
say_average_rate => $say_average_rate, |
|
average_rate => $average_rate, |
|
}}); |
|
|
|
my $line = "bytes_downloaded=$bytes_downloaded percent=$percent current_rate=$byte_rate average_rate=$average_rate seconds_running=$running_time seconds_left=$seconds_left url=$url out_file=$out_file"; |
|
$next_report += $report_interval; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
line => $line, |
|
next_report => $next_report, |
|
}}); |
|
print $line."\n"; |
|
} |
|
} |
|
} |
|
close $file_handle; |
|
|
|
# We should have the file now. Move it to the final position. |
|
if (($failed) or (not -e $temp_file)) |
|
{ |
|
# Something went wrong. Unlink the temp file. |
|
if (-e $temp_file) |
|
{ |
|
unlink $temp_file; |
|
} |
|
$anvil->nice_exit({exit_code => $failed}); |
|
} |
|
else |
|
{ |
|
# Move it (overwrite set because we'd be dead by now if '--overwrite' wasn't used. |
|
my $failed = $anvil->Storage->move_file({ |
|
debug => 2, |
|
source_file => $temp_file, |
|
target_file => $save_to, |
|
overwrite => 1, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); |
|
if (($failed) or (not -e $out_file)) |
|
{ |
|
# Something went wrong. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "err", key => "error_0060", variables => { |
|
source_file => $temp_file, |
|
target_file => $save_to, |
|
}}); |
|
$anvil->nice_exit({exit_code => 8}); |
|
} |
|
} |
|
|
|
# Set it executable if '--script' was used. |
|
if ($anvil->data->{switches}{script}) |
|
{ |
|
$anvil->Storage->change_mode({debug => 2, path => $out_file, mode => "a+x"}); |
|
} |
|
|
|
# Tell the user that we're done. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, , key => "log_0297", variables => { file => $out_file }}); |
|
|
|
return(0); |
|
} |
|
|
|
sub abort_download |
|
{ |
|
my ($anvil) = @_; |
|
|
|
|
|
|
|
return(0); |
|
} |
|
|
|
# Show the status of any downloading, finished, failed or aborted downloads. |
|
sub show_status |
|
{ |
|
my ($anvil) = @_; |
|
|
|
|
|
|
|
return(0); |
|
}
|
|
|