#!/usr/bin/perl # # This takes a URL (ftp, http or https) and downloads the file. If it is called without --url, it shows the # progress of any other instances currently downloading files. # # Return codes: # 0 = Normal exit. # 1 = No database connections available # 2 = The requested URL was not found on the remote server. # 3 = The requested URL does not resolve to a known domain. # 4 = The requested URL failed because the remote host refused the connection. # 5 = The requested URL failed because there is no route to that host. # 6 = Abort requested, but UUID or PID not passed # 7 = The requested URL failed because the network is unreachable. # 8 = The file to download already exists. # 9 = Something went wrong moving the file from temp to the output directory. # 10 = URL not found. # 11 = The --job-uuid is invalid # 12 = The --job-uuid is already being handled by another process # # # TODO: # - # # NOTE: # - # use strict; use warnings; use Anvil::Tools; use Data::Dumper; # Disable buffering $| = 1; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } my $anvil = Anvil::Tools->new(); $anvil->Log->level({set => 2}); $anvil->Log->secure({set => 1}); $anvil->Database->connect; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, exit. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "error_0003"}); $anvil->nice_exit({exit_code => 1}); } $anvil->data->{switches}{abort} = ""; $anvil->data->{switches}{'job-uuid'} = ""; $anvil->data->{switches}{overwrite} = ""; $anvil->data->{switches}{'save-to'} = ""; # /mnt/shared/files by default $anvil->data->{switches}{script} = ""; $anvil->data->{switches}{url} = ""; $anvil->Get->switches; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::abort' => $anvil->data->{switches}{abort}, 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, 'switches::overwrite' => $anvil->data->{switches}{overwrite}, 'switches::save-to' => $anvil->data->{switches}{'save-to'}, 'switches::script' => $anvil->data->{switches}{script}, 'switches::url' => $anvil->data->{switches}{url}, }}); # If I don't have --abort or --url, see if there is a job waiting if ((not $anvil->data->{switches}{abort}) && (not $anvil->data->{switches}{url})) { get_job_details($anvil); } # Do what now? if ($anvil->data->{switches}{abort}) { # Kill the other download abort_download($anvil); } elsif ($anvil->data->{switches}{url}) { # Try to download the file download_file($anvil); } else { # Show the status of any downloading, finished, failed or aborted downloads. show_status($anvil); } # We're done $anvil->nice_exit({exit_code => 0}); ############################################################################################################# # Private functions. # ############################################################################################################# # This loads a job's details, or looks for unclaimed jobs that aren't finished. sub get_job_details { my ($anvil) = @_; # If I don't have a job-uuid, see if any jobs are pending if (not $anvil->data->{switches}{'job-uuid'}) { my $job_uuid = $anvil->Job->get_job_uuid({debug => 2, program => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); if ($anvil->Validate->uuid({uuid => $job_uuid})) { # Got one! $anvil->data->{switches}{'job-uuid'} = $job_uuid; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); } } # If we've got a job-uuid, load the details. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); if ($anvil->data->{switches}{'job-uuid'}) { my $problem = $anvil->Job->get_job_details({ debug => 2, check => 1, }); # If 'problem' is '1', the job-uuid is bad. If it's '2', it's already being handled by # another process. In either case, we're not going to handle this. if (not $problem) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem, "jobs::job_uuid" => $anvil->data->{jobs}{job_uuid}, "jobs::job_host_uuid" => $anvil->data->{jobs}{job_host_uuid}, "jobs::job_command" => $anvil->data->{jobs}{job_command}, "jobs::job_data" => $anvil->data->{jobs}{job_data}, "jobs::job_picked_up_by" => $anvil->data->{jobs}{job_picked_up_by}, "jobs::job_picked_up_at" => $anvil->data->{jobs}{job_picked_up_at}, "jobs::job_updated" => $anvil->data->{jobs}{job_updated}, "jobs::job_name" => $anvil->data->{jobs}{job_name}, "jobs::job_progress" => $anvil->data->{jobs}{job_progress}, "jobs::job_title" => $anvil->data->{jobs}{job_title}, "jobs::job_description" => $anvil->data->{jobs}{job_description}, "jobs::job_status" => $anvil->data->{jobs}{job_status}, }}); # Store the job-uuid in the switches hash. $anvil->data->{switches}{'job-uuid'} = $anvil->data->{jobs}{job_uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); } elsif ($problem eq "1") { # Bad UUID $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0061", variables => { job_uuid => $anvil->data->{jobs}{job_uuid} }}); $anvil->nice_exit({exit_code => 11}); } else { # Job is already being handled by another active process. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0062", variables => { job_uuid => $anvil->data->{jobs}{job_uuid} }}); $anvil->nice_exit({exit_code => 12}); } # Pull apart the job-data and feed them into the switches hash if ($anvil->data->{switches}{'job-uuid'}) { foreach my $pair (split/,/, $anvil->data->{jobs}{job_data}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pair => $pair }}); my ($variable, $value) = ($pair =~ /^(.*?)=(.*)$/); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable => $variable, value => $value, }}); $anvil->data->{switches}{$variable} = $value; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::${variable}" => $anvil->data->{switches}{$variable} }}); } } # If "job_picked_up_by" is set, clear it. (It won't be running because we did 'check' # earlier) if ($anvil->data->{jobs}{job_picked_up_by}) { $anvil->Job->clear({ debug => 2, job_uuid => $anvil->data->{switches}{'job-uuid'}, }); } # Mark it as 1% done. $anvil->Job->update_progress({ debug => 2, job_uuid => $anvil->data->{switches}{'job-uuid'}, }); } return(0); } sub download_file { my ($anvil) = @_; my $failed = 0; my $url = $anvil->data->{switches}{url}; my $file_name = ($url =~ /^.*\/(.*)$/)[0]; my $temp_file = $anvil->data->{path}{directories}{shared}{temp}."/".$file_name; my $save_to = $anvil->data->{switches}{'save-to'} ? $anvil->data->{switches}{'save-to'} : $anvil->data->{path}{directories}{shared}{files}; my $out_file = $save_to."/".$file_name; $save_to =~ s/\/\///g; my $unix_start = time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { url => $url, file_name => $file_name, temp_file => $temp_file, unix_start => $unix_start, save_to => $save_to, }}); # Is this a supported protocol? if (($url !~ /^ftp\:\/\//) && ($url !~ /^http\:\/\//) && ($url !~ /^https\:\/\//)) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0286", variables => { url => $url }}); $anvil->nice_exit({exit_code => 10}); } # If the target file exists, exit. if ((-e $out_file) && (not $anvil->data->{switches}{overwrite})) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => "err", key => "error_0059", variables => { file => $out_file }}); $anvil->nice_exit({exit_code => 8}); } # Make sure the output directory exists. if (not -e $save_to) { my $failed = $anvil->Storage->make_directory({ debug => 2, directory => $save_to, mode => "0777", user => "apache", group => "apache", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); } if (not -e $anvil->data->{path}{directories}{shared}{temp}) { my $failed = $anvil->Storage->make_directory({ debug => 2, directory => $anvil->data->{path}{directories}{shared}{temp}, mode => "0777", user => "apache", group => "apache", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); } ### NOTE: We don't use System->call because we need to track the output in real time. # Try to download it. my $bytes_downloaded = 0; my $downloaded = 0; # Bytes my $percent = 0; my $rate = 0; # Bytes/sec my $time_left = 0; # Seconds my $running_time = 0; my $average_rate = 0; my $report_interval = 5; # Seconds between status file update my $next_report = time + $report_interval; my $shell_call = $anvil->data->{path}{exe}{wget}." --continue --progress=dot:binary ".$url." --output-document ".$temp_file; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { next_report => $next_report, shell_call => $shell_call, }}); open (my $file_handle, $shell_call." 2>&1 |") or $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, priority => "err", key => "log_0014", variables => { shell_call => $shell_call, error => $! }}); while(<$file_handle>) { chomp; my $line = $anvil->Words->clean_spaces({string => $_}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0017", variables => { line => $line }}); # Check for problems if (($line =~ /404/) && ($line =~ /Not Found/i)) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0287", variables => { url => $url }}); $failed = 2; } elsif ($line =~ /Name or service not known/i) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0288", variables => { url => $url }}); $failed = 3; } elsif ($line =~ /Connection refused/i) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0289", variables => { url => $url }}); $failed = 4; } elsif ($line =~ /route to host/i) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0290", variables => { url => $url }}); $failed = 5; } elsif ($line =~ /Network is unreachable/i) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0291", variables => { url => $url }}); $failed = 7; } elsif ($line =~ /^(\d+)K .*? (\d+)% (.*?) (\d+.*)$/) { $downloaded = $1; $percent = $2; $rate = $3; $time_left = $4; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { downloaded => $url, percent => $percent, rate => $rate, time_left => $time_left, }}); ### NOTE: According to: http://savannah.gnu.org/bugs/index.php?22765, wget uses base-2. # Convert $bytes_downloaded = $downloaded * 1024; my $say_downloaded = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $bytes_downloaded}); my $say_percent = $percent."%"; my $byte_rate = $anvil->Convert->human_readable_to_bytes({debug => 3, size => $rate, base2 => 1}); my $say_rate = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $byte_rate})."/s"; $running_time = time - $unix_start; my $say_running_time = $anvil->Convert->time({debug => 3, 'time' => $running_time, translate => 1}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { bytes_downloaded => $bytes_downloaded, say_downloaded => $say_downloaded, say_percent => $say_percent, byte_rate => $byte_rate, say_rate => $say_rate, running_time => $running_time, say_running_time => $say_running_time, }}); # Time left is a bit more complicated my $days = 0; my $hours = 0; my $minutes = 0; my $seconds = 0; if ($time_left =~ /(\d+)d/) { $days = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { days => $days }}); } if ($time_left =~ /(\d+)h/) { $hours = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { hours => $hours }}); } if ($time_left =~ /(\d+)m/) { $minutes = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { minutes => $minutes }}); } if ($time_left =~ /(\d+)s/) { $seconds = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seconds => $seconds }}); } my $seconds_left = (($days * 86400) + ($hours * 3600) + ($minutes * 60) + $seconds); my $say_time_left = $anvil->Convert->time({debug => 3, 'time' => $seconds_left, suffix => "long", translate => 1}); $running_time = 1 if not $running_time; $average_rate = int($bytes_downloaded / $running_time); my $say_average_rate = $anvil->Convert->bytes_to_human_readable({debug => 3, 'bytes' => $average_rate})."/s"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { seconds_left => $seconds_left, say_time_left => $say_time_left, running_time => $running_time, average_rate => $average_rate, say_average_rate => $say_average_rate, }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'time' => time, next_report => $next_report, }}); if (time > $next_report) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_downloaded => $say_downloaded, percent => $percent, say_rate => $say_rate, running_time => $running_time, say_running_time => $say_running_time, seconds_left => $seconds_left, say_time_left => $say_time_left, say_percent => $say_percent, say_average_rate => $say_average_rate, average_rate => $average_rate, }}); my $line = "bytes_downloaded=$bytes_downloaded percent=$percent current_rate=$byte_rate average_rate=$average_rate seconds_running=$running_time seconds_left=$seconds_left url=$url out_file=$out_file"; $next_report += $report_interval; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line, next_report => $next_report, }}); print $line."\n"; } } } close $file_handle; # We should have the file now. Move it to the final position. if (($failed) or (not -e $temp_file)) { # Something went wrong. Unlink the temp file. if (-e $temp_file) { unlink $temp_file; } $anvil->nice_exit({exit_code => $failed}); } else { # Move it (overwrite set because we'd be dead by now if '--overwrite' wasn't used. my $failed = $anvil->Storage->move_file({ debug => 2, source_file => $temp_file, target_file => $save_to, overwrite => 1, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { failed => $failed }}); if (($failed) or (not -e $out_file)) { # Something went wrong. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "err", key => "error_0060", variables => { source_file => $temp_file, target_file => $save_to, }}); $anvil->nice_exit({exit_code => 8}); } } # Set it executable if '--script' was used. if ($anvil->data->{switches}{script}) { $anvil->Storage->change_mode({debug => 2, path => $out_file, mode => "a+x"}); } # Tell the user that we're done. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, , key => "log_0297", variables => { file => $out_file }}); return(0); } sub abort_download { my ($anvil) = @_; return(0); } # Show the status of any downloading, finished, failed or aborted downloads. sub show_status { my ($anvil) = @_; return(0); }