From 529c12d2e24ce8b3d0ee70425f9be1b536af4315 Mon Sep 17 00:00:00 2001 From: Digimer Date: Thu, 21 Feb 2019 23:57:26 -0500 Subject: [PATCH] * Updated anvil-manage-files to scan all /mnt/shared/* directories to search for new files. Only files in incoming are moved though, as before. Starting work on pulling files now. * Updated the files table to add the file_mtime. In the future, if two versions of the same file exists on different machines, the one with the more recent mtime will be copied over the others. Signed-off-by: Digimer --- Anvil/Tools.pm | 1 + Anvil/Tools/Database.pm | 23 +++- share/anvil.sql | 4 + tools/anvil-manage-files | 261 +++++++++++++++++++++++++-------------- 4 files changed, 192 insertions(+), 97 deletions(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index 65ccf372..303c4c08 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -957,6 +957,7 @@ sub _set_paths scan_agents => "/usr/sbin/scancore-agents", shared => { archives => "/mnt/shared/archives", + base => "/mnt/shared", definitions => "/mnt/shared/definitions", files => "/mnt/shared/files", incoming => "/mnt/shared/incoming", diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 245f883d..2e0b0d9c 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -2401,6 +2401,10 @@ This is the sum as calculated when the file is first uploaded. Once recorded, it This is the file's type/purpose. The expected values are 'iso' (disc image a new server can be installed from or mounted in a virtual optical drive), 'rpm' (a package to install on a guest that provides access to Anvil! RPM software), 'script' (pre or post migration scripts), 'image' (images to use for newly created servers, instead of installing from an ISO or PXE), or 'other'. +=head3 file_mtime (required) + +This is the file's C<< mtime >> (modification time as a unix timestamp). This is used in case a file of the same name exists on two or more systems, but their size or md5sum differ. The file with the most recent mtime is used to update the older versions. + =cut sub insert_or_update_files { @@ -2417,12 +2421,14 @@ sub insert_or_update_files my $file_size = defined $parameter->{file_size} ? $parameter->{file_size} : ""; my $file_md5sum = defined $parameter->{file_md5sum} ? $parameter->{file_md5sum} : ""; my $file_type = defined $parameter->{file_type} ? $parameter->{file_type} : ""; + my $file_mtime = defined $parameter->{file_mtime} ? $parameter->{file_mtime} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { file_uuid => $file_uuid, file_name => $file_name, file_size => $file_size, file_md5sum => $file_md5sum, file_type => $file_type, + file_mtime => $file_mtime, }}); if (not $file_name) @@ -2449,6 +2455,12 @@ sub insert_or_update_files $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->insert_or_update_files()", parameter => "file_type" }}); return(""); } + if (not $file_mtime) + { + # Throw an error and exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->insert_or_update_files()", parameter => "file_mtime" }}); + return(""); + } # If we don't have a UUID, see if we can find one for the given md5sum. if (not $file_uuid) @@ -2515,6 +2527,7 @@ INSERT INTO file_size, file_md5sum, file_type, + file_mtime, modified_date ) VALUES ( ".$anvil->data->{sys}{database}{use_handle}->quote($file_uuid).", @@ -2522,7 +2535,8 @@ INSERT INTO ".$anvil->data->{sys}{database}{use_handle}->quote($file_size).", ".$anvil->data->{sys}{database}{use_handle}->quote($file_md5sum).", ".$anvil->data->{sys}{database}{use_handle}->quote($file_type).", - ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})." + ".$anvil->data->{sys}{database}{use_handle}->quote($file_mtime).", + ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})." ); "; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); @@ -2536,7 +2550,8 @@ SELECT file_name, file_size, file_md5sum, - file_type + file_type, + file_mtime FROM files WHERE @@ -2562,17 +2577,20 @@ WHERE my $old_file_size = $row->[1]; my $old_file_md5sum = $row->[2]; my $old_file_type = $row->[3]; + my $old_file_mtime = $row->[4]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_file_name => $old_file_name, old_file_size => $old_file_size, old_file_md5sum => $old_file_md5sum, old_file_type => $old_file_type, + old_file_mtime => $old_file_mtime, }}); # Anything change? if (($old_file_name ne $file_name) or ($old_file_size ne $file_size) or ($old_file_md5sum ne $file_md5sum) or + ($old_file_mtime ne $file_mtime) or ($old_file_type ne $file_type)) { # Something changed, save. @@ -2584,6 +2602,7 @@ SET file_size = ".$anvil->data->{sys}{database}{use_handle}->quote($file_size).", file_md5sum = ".$anvil->data->{sys}{database}{use_handle}->quote($file_md5sum).", file_type = ".$anvil->data->{sys}{database}{use_handle}->quote($file_type).", + file_mtime = ".$anvil->data->{sys}{database}{use_handle}->quote($file_mtime).", modified_date = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})." WHERE file_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($file_uuid)." diff --git a/share/anvil.sql b/share/anvil.sql index cd5f2dce..4c6f737d 100644 --- a/share/anvil.sql +++ b/share/anvil.sql @@ -1105,6 +1105,7 @@ CREATE TABLE files ( file_size numeric not null, -- This is the file's size in bytes. If it recorded as a quick way to determine if a file has changed on disk. file_md5sum text not null, -- This is the sum as calculated when the file is first uploaded. Once recorded, it can't change. file_type text not null, -- This is the file's type/purpose. The expected values are 'iso', 'rpm', 'script', 'disk-image', or 'other'. + file_mtime numeric not null, -- If the same file exists on different machines and differ md5sums/sizes, the one with the most recent mtime will be used to update the others. modified_date timestamp with time zone not null ); ALTER TABLE files OWNER TO admin; @@ -1116,6 +1117,7 @@ CREATE TABLE history.files ( file_size numeric, file_md5sum text, file_type text, + file_mtime numeric, modified_date timestamp with time zone not null ); ALTER TABLE history.files OWNER TO admin; @@ -1132,6 +1134,7 @@ BEGIN file_size, file_md5sum, file_type, + file_mtime, modified_date) VALUES (history_files.file_uuid, @@ -1139,6 +1142,7 @@ BEGIN history_files.file_size, history_files.file_md5sum, history_files.file_type, + history_files.file_mtime, history_files.modified_date); RETURN NULL; END; diff --git a/tools/anvil-manage-files b/tools/anvil-manage-files index 1fef8b7a..aaeaf742 100755 --- a/tools/anvil-manage-files +++ b/tools/anvil-manage-files @@ -131,7 +131,7 @@ else check_incoming($anvil); # Check for files we should have but don't yet have. - find_missing_files($anvil); + #find_missing_files($anvil); } # We're done @@ -167,33 +167,33 @@ sub find_missing_files my ($anvil) = @_; # What am I? This will impact how missing files are found. - my $query = " -SELECT - file_location_file_uuid -FROM - file_locations -WHERE - file_location_host_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{host_uuid})." -;"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); - - my $results = $anvil->Database->query({query => $query, source => $file ? $file." -> ".$THIS_FILE : $THIS_FILE, line => $line ? $line." -> ".__LINE__ : __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - results => $results, - count => $count, - }}); - foreach my $row (@{$results}) - { - my $file_location_file_uuid = $row->[0]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - file_location_file_uuid => $file_location_file_uuid, - }}); - - ### TODO: How to handle when the file with the same name exists on 2+ machines with - ### different md5sums. Use the most recent mtime? - # Read in the file details. - } +# my $query = " +# SELECT +# file_location_file_uuid +# FROM +# file_locations +# WHERE +# file_location_host_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{host_uuid})." +# ;"; +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); +# +# my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); +# my $count = @{$results}; +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { +# results => $results, +# count => $count, +# }}); +# foreach my $row (@{$results}) +# { +# my $file_location_file_uuid = $row->[0]; +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { +# file_location_file_uuid => $file_location_file_uuid, +# }}); +# +# ### TODO: How to handle when the file with the same name exists on 2+ machines with +# ### different md5sums. Use the most recent mtime? +# # Read in the file details. +# } # Read in any entries from 'file_locations'. @@ -212,56 +212,95 @@ sub check_incoming } # Read any files in '/mnt/shared/incoming'. $anvil->Storage->scan_directory({ - debug => 3, - directory => $anvil->data->{path}{directories}{shared}{incoming}, - recursive => 0, + debug => 2, + directory => $anvil->data->{path}{directories}{shared}{base}, + recursive => 1, }); + my $incoming_directory = $anvil->data->{path}{directories}{shared}{incoming}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0264"}); foreach my $full_path (sort {$a cmp $b} keys %{$anvil->data->{scan}{directories}}) { - # Is this a file? + # Skip if this isn't a file. + my $file_type = $anvil->data->{scan}{directories}{$full_path}{type}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + full_path => $full_path, + file_type => $file_type, + }}); + next if $file_type ne "file"; + my $file_name = $anvil->data->{scan}{directories}{$full_path}{name}; - my $file_type = $anvil->data->{scan}{directories}{$full_path}{type}; my $file_size = $anvil->data->{scan}{directories}{$full_path}{size}; + my $file_mtime = $anvil->data->{scan}{directories}{$full_path}{mtime}; my $file_mimetype = $anvil->data->{scan}{directories}{$full_path}{mimetype}; my $file_executable = $anvil->data->{scan}{directories}{$full_path}{executable} = -x $full_path ? 1 : 0; + my $say_mimetype = convert_mimetype($anvil, $file_mimetype, $full_path, $file_executable); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - full_path => $full_path, file_name => $file_name, - file_type => $file_type, file_size => $file_size, + file_mtime => $file_mtime, file_mimetype => $file_mimetype, file_executable => $file_executable, + say_mimetype => $say_mimetype, }}); - next if $file_type ne "file"; - # If this file is over 128 MiB, warn the user that it might take a second - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0265", variables => { file => $full_path }}); - if ($file_size > (128 * (2 ** 20))) + # Do I know about this file? If so, is the file the same size? If either is no, calculate the md5sum. + my ($file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum) = get_file_db_info($anvil, "", $file_name); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + file_uuid => $file_uuid, + recorded_size => $recorded_size, + recorded_mtime => $recorded_mtime, + recorded_md5sum => $recorded_md5sum, + }}); + + # Calculate the md5sum? + my $file_md5sum = $recorded_md5sum; + if ((not $file_uuid) or ($file_size != $recorded_size)) { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0266", variables => { - size => $anvil->Convert->bytes_to_human_readable({'bytes' => $file_size}), + # Yes. But first, do we have a size mismatch? If so, see if we need to pull a newer + # version down from elsewhere. + if (($file_uuid) && ($file_mtime <= $recorded_mtime)) + { + # We've got an older file, we need to update. + pull_file($anvil, $file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum); + + # TODO: Now see if it exists and, if it does, re-stat it. If not, loop to the + # next file and skip this one. + } + + # Now generate the md5sum. If this file is over 128 MiB, warn the user that it might + # take a while. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0265", variables => { file => $full_path }}); + if ($file_size > (128 * (2 ** 20))) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0266", variables => { + size => $anvil->Convert->bytes_to_human_readable({'bytes' => $file_size}), + }}); + } + + # Update (or get) the md5sum. + $file_md5sum = $anvil->Get->md5sum({debug => 2, file => $full_path}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + say_mimetype => $say_mimetype, + file_md5sum => $file_md5sum, }}); + + # Insert or update the files entry. + ($file_uuid) = $anvil->Database->insert_or_update_files({ + debug => 2, + file_uuid => $file_uuid, + file_name => $file_name, + file_size => $file_size, + file_md5sum => $file_md5sum, + file_mtime => $file_mtime, + file_type => $say_mimetype, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }}); } - my $say_mimetype = convert_mimetype($anvil, $file_mimetype, $full_path, $file_executable); - my $file_md5sum = $anvil->Get->md5sum({debug => 2, file => $full_path}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - say_mimetype => $say_mimetype, - file_md5sum => $file_md5sum, - }}); - # Do we know about this file? If not, file_uuid will be blank when we call the - # insert_or_update. If we do, it will update the file name, if needed. - my ($file_uuid) = $anvil->Database->insert_or_update_files({ - debug => 2, - file_uuid => get_file_uuid($anvil, $file_md5sum, $file_name), - file_name => $file_name, - file_size => $file_size, - file_md5sum => $file_md5sum, - file_type => $say_mimetype, - }); + # If we still don't have a file UUID for some reason, skip this file. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }}); + next if not $file_uuid; # Make sure we know about this file on this system my ($file_locatiom_uuid) = $anvil->Database->insert_or_update_file_locations({ @@ -271,31 +310,58 @@ sub check_incoming }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_locatiom_uuid => $file_locatiom_uuid }}); - # Not move it. If it's a definition file, we'll move it to - # 'path::directories::shared::definitions', otherwise we'll move it to - # 'path::directories::shared::files'. - my $target = $say_mimetype eq "definition" ? $anvil->data->{path}{directories}{shared}{definitions} : $anvil->data->{path}{directories}{shared}{files}; - $target .= "/"; - $target =~ s/\/\//\//g; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target => $target }}); - - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0268", variables => { - file => $full_path, - target => $target, + # Are we in the incoming directory? If so, move the file. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + full_path => $full_path, + incoming_directory => $incoming_directory, }}); - $anvil->Storage->move_file({ - debug => 2, - source_file => $full_path, - target_file => $target, - }); + if ($full_path =~ /^$incoming_directory/) + { + # If it's a definition file, we'll move it to + # 'path::directories::shared::definitions', otherwise we'll move it to + # 'path::directories::shared::files'. + my $target = $say_mimetype eq "definition" ? $anvil->data->{path}{directories}{shared}{definitions} : $anvil->data->{path}{directories}{shared}{files}; + $target .= "/"; + $target =~ s/\/\//\//g; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { target => $target }}); + + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0268", variables => { + file => $full_path, + target => $target, + }}); + $anvil->Storage->move_file({ + debug => 2, + source_file => $full_path, + target_file => $target, + }); + } } return(0); } +# This method finds a file elsewhere on the network and pulls it to here. +sub pull_file +{ + my ($anvil, $file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum) = @_; + $file_uuid = "" if not defined $file_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + file_uuid => $file_uuid, + recorded_size => $recorded_size, + recorded_mtime => $recorded_mtime, + recorded_md5sum => $recorded_md5sum, + }}); + + # Find the hosts with this file, then connect to it to see if the size is the same as what we want. + # If so, pull it down... + ### TODO + + return(0); +} + # This gets the file_uuid for a given file name and/or md5sum. If the file isn't found, an empty string is -# returned. -sub get_file_uuid +# returned. If it is found, the file size as recorded in the database is returned. +sub get_file_db_info { my ($anvil, $file_md5sum, $file_name) = @_; $file_md5sum = "" if not defined $file_md5sum; @@ -305,12 +371,15 @@ sub get_file_uuid file_name => $file_name, }}); - ### TODO: At some point, we'll need to deal with the possibility that the same file name with - ### different md5sums might exist in the database. + # Get the file size and file uuid, if possible. + # If I have the md5sum, search using that. If I have the filename only, then we'll fall back to that. my $query = " SELECT - file_uuid + file_uuid, + file_size, + file_mtime, + file_md5sum FROM files WHERE @@ -332,14 +401,20 @@ WHERE if (not $count) { # File wasn't found in the database - return(""); + return("", 0, 0, ""); } - my $file_uuid = $results->[0]->[0]; + my $file_uuid = defined $results->[0]->[0] ? $results->[0]->[0] : ""; + my $file_size = defined $results->[0]->[1] ? $results->[0]->[1] : 0; + my $file_mtime = defined $results->[0]->[2] ? $results->[0]->[2] : 0; + $file_md5sum = defined $results->[0]->[3] ? $results->[0]->[3] : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - file_uuid => $file_uuid, + file_uuid => $file_uuid, + file_size => $file_size, + file_mtime => $file_mtime, + file_md5sum => $file_md5sum, }}); - return($file_uuid); + return($file_uuid, $file_size, $file_mtime, $file_md5sum); } # This handles toggling a file to marked or unmarked as a script. @@ -359,8 +434,8 @@ sub handle_script $anvil->nice_exit({exit_code => 3}); } - # Find the file_uuid. - my ($file_uuid) = get_file_uuid($anvil, "", $anvil->data->{switches}{file}); + # Find the file_uuid (we don't actually care about the file size, mtime or md5sum). + my ($file_uuid, $file_size, $recorded_mtime, $file_md5sum) = get_file_db_info($anvil, "", $anvil->data->{switches}{file}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }}); # Toggle the executable bits. @@ -403,7 +478,8 @@ SELECT file_name, file_size, file_md5sum, - file_type + file_type, + file_mtime FROM files WHERE @@ -417,11 +493,13 @@ WHERE my $file_size = $results->[0]->[1]; my $file_md5sum = $results->[0]->[2]; my $file_type = $results->[0]->[3]; + my $file_mtime = $results->[0]->[4]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_name => $file_name, file_size => $file_size, file_md5sum => $file_md5sum, file_type => $file_type, + file_mtime => $file_mtime, }}); if (($file_type eq "script") && (not $anvil->data->{switches}{'is-script'})) { @@ -429,23 +507,15 @@ WHERE my $mimetype = mimetype($file_name); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mimetype => $mimetype }}); - my $say_mimetype = convert_mimetype($anvil, $mimetype, $file_name, $executable); + my $say_mimetype = convert_mimetype($anvil, $mimetype, $file_name, 0); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_mimetype => $say_mimetype }}); - $anvil->Database->insert_or_update_files({ - file_uuid => $file_uuid, - file_name => $anvil->data->{switches}{file}, - file_size => $file_size, - file_md5sum => $file_md5sum, - file_type => $say_mimetype, - }); - - # Change the file tpye to "say_mimetype". $anvil->Database->insert_or_update_files({ debug => 2, file_uuid => $file_uuid, file_name => $anvil->data->{switches}{file}, file_size => $file_size, file_md5sum => $file_md5sum, + file_mtime => $file_mtime, file_type => $say_mimetype, }); } @@ -458,6 +528,7 @@ WHERE file_name => $anvil->data->{switches}{file}, file_size => $file_size, file_md5sum => $file_md5sum, + file_mtime => $file_mtime, file_type => "script", }); }