* Updated anvil-manage-files to scan all /mnt/shared/* directories to search for new files. Only files in incoming are moved though, as before. Starting work on pulling files now.

* Updated the files table to add the file_mtime. In the future, if two versions of the same file exists on different machines, the one with the more recent mtime will be copied over the others.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 6 years ago
parent d9e9884e53
commit 529c12d2e2
  1. 1
      Anvil/Tools.pm
  2. 21
      Anvil/Tools/Database.pm
  3. 4
      share/anvil.sql
  4. 201
      tools/anvil-manage-files

@ -957,6 +957,7 @@ sub _set_paths
scan_agents => "/usr/sbin/scancore-agents",
shared => {
archives => "/mnt/shared/archives",
base => "/mnt/shared",
definitions => "/mnt/shared/definitions",
files => "/mnt/shared/files",
incoming => "/mnt/shared/incoming",

@ -2401,6 +2401,10 @@ This is the sum as calculated when the file is first uploaded. Once recorded, it
This is the file's type/purpose. The expected values are 'iso' (disc image a new server can be installed from or mounted in a virtual optical drive), 'rpm' (a package to install on a guest that provides access to Anvil! RPM software), 'script' (pre or post migration scripts), 'image' (images to use for newly created servers, instead of installing from an ISO or PXE), or 'other'.
=head3 file_mtime (required)
This is the file's C<< mtime >> (modification time as a unix timestamp). This is used in case a file of the same name exists on two or more systems, but their size or md5sum differ. The file with the most recent mtime is used to update the older versions.
=cut
sub insert_or_update_files
{
@ -2417,12 +2421,14 @@ sub insert_or_update_files
my $file_size = defined $parameter->{file_size} ? $parameter->{file_size} : "";
my $file_md5sum = defined $parameter->{file_md5sum} ? $parameter->{file_md5sum} : "";
my $file_type = defined $parameter->{file_type} ? $parameter->{file_type} : "";
my $file_mtime = defined $parameter->{file_mtime} ? $parameter->{file_mtime} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
file_uuid => $file_uuid,
file_name => $file_name,
file_size => $file_size,
file_md5sum => $file_md5sum,
file_type => $file_type,
file_mtime => $file_mtime,
}});
if (not $file_name)
@ -2449,6 +2455,12 @@ sub insert_or_update_files
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->insert_or_update_files()", parameter => "file_type" }});
return("");
}
if (not $file_mtime)
{
# Throw an error and exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->insert_or_update_files()", parameter => "file_mtime" }});
return("");
}
# If we don't have a UUID, see if we can find one for the given md5sum.
if (not $file_uuid)
@ -2515,6 +2527,7 @@ INSERT INTO
file_size,
file_md5sum,
file_type,
file_mtime,
modified_date
) VALUES (
".$anvil->data->{sys}{database}{use_handle}->quote($file_uuid).",
@ -2522,6 +2535,7 @@ INSERT INTO
".$anvil->data->{sys}{database}{use_handle}->quote($file_size).",
".$anvil->data->{sys}{database}{use_handle}->quote($file_md5sum).",
".$anvil->data->{sys}{database}{use_handle}->quote($file_type).",
".$anvil->data->{sys}{database}{use_handle}->quote($file_mtime).",
".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})."
);
";
@ -2536,7 +2550,8 @@ SELECT
file_name,
file_size,
file_md5sum,
file_type
file_type,
file_mtime
FROM
files
WHERE
@ -2562,17 +2577,20 @@ WHERE
my $old_file_size = $row->[1];
my $old_file_md5sum = $row->[2];
my $old_file_type = $row->[3];
my $old_file_mtime = $row->[4];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
old_file_name => $old_file_name,
old_file_size => $old_file_size,
old_file_md5sum => $old_file_md5sum,
old_file_type => $old_file_type,
old_file_mtime => $old_file_mtime,
}});
# Anything change?
if (($old_file_name ne $file_name) or
($old_file_size ne $file_size) or
($old_file_md5sum ne $file_md5sum) or
($old_file_mtime ne $file_mtime) or
($old_file_type ne $file_type))
{
# Something changed, save.
@ -2584,6 +2602,7 @@ SET
file_size = ".$anvil->data->{sys}{database}{use_handle}->quote($file_size).",
file_md5sum = ".$anvil->data->{sys}{database}{use_handle}->quote($file_md5sum).",
file_type = ".$anvil->data->{sys}{database}{use_handle}->quote($file_type).",
file_mtime = ".$anvil->data->{sys}{database}{use_handle}->quote($file_mtime).",
modified_date = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{database}{timestamp})."
WHERE
file_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($file_uuid)."

@ -1105,6 +1105,7 @@ CREATE TABLE files (
file_size numeric not null, -- This is the file's size in bytes. If it recorded as a quick way to determine if a file has changed on disk.
file_md5sum text not null, -- This is the sum as calculated when the file is first uploaded. Once recorded, it can't change.
file_type text not null, -- This is the file's type/purpose. The expected values are 'iso', 'rpm', 'script', 'disk-image', or 'other'.
file_mtime numeric not null, -- If the same file exists on different machines and differ md5sums/sizes, the one with the most recent mtime will be used to update the others.
modified_date timestamp with time zone not null
);
ALTER TABLE files OWNER TO admin;
@ -1116,6 +1117,7 @@ CREATE TABLE history.files (
file_size numeric,
file_md5sum text,
file_type text,
file_mtime numeric,
modified_date timestamp with time zone not null
);
ALTER TABLE history.files OWNER TO admin;
@ -1132,6 +1134,7 @@ BEGIN
file_size,
file_md5sum,
file_type,
file_mtime,
modified_date)
VALUES
(history_files.file_uuid,
@ -1139,6 +1142,7 @@ BEGIN
history_files.file_size,
history_files.file_md5sum,
history_files.file_type,
history_files.file_mtime,
history_files.modified_date);
RETURN NULL;
END;

@ -131,7 +131,7 @@ else
check_incoming($anvil);
# Check for files we should have but don't yet have.
find_missing_files($anvil);
#find_missing_files($anvil);
}
# We're done
@ -167,33 +167,33 @@ sub find_missing_files
my ($anvil) = @_;
# What am I? This will impact how missing files are found.
my $query = "
SELECT
file_location_file_uuid
FROM
file_locations
WHERE
file_location_host_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{host_uuid})."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $file ? $file." -> ".$THIS_FILE : $THIS_FILE, line => $line ? $line." -> ".__LINE__ : __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $file_location_file_uuid = $row->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
file_location_file_uuid => $file_location_file_uuid,
}});
### TODO: How to handle when the file with the same name exists on 2+ machines with
### different md5sums. Use the most recent mtime?
# Read in the file details.
}
# my $query = "
# SELECT
# file_location_file_uuid
# FROM
# file_locations
# WHERE
# file_location_host_uuid = ".$anvil->data->{sys}{database}{use_handle}->quote($anvil->data->{sys}{host_uuid})."
# ;";
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }});
#
# my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
# my $count = @{$results};
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
# results => $results,
# count => $count,
# }});
# foreach my $row (@{$results})
# {
# my $file_location_file_uuid = $row->[0];
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
# file_location_file_uuid => $file_location_file_uuid,
# }});
#
# ### TODO: How to handle when the file with the same name exists on 2+ machines with
# ### different md5sums. Use the most recent mtime?
# # Read in the file details.
# }
# Read in any entries from 'file_locations'.
@ -212,31 +212,64 @@ sub check_incoming
}
# Read any files in '/mnt/shared/incoming'.
$anvil->Storage->scan_directory({
debug => 3,
directory => $anvil->data->{path}{directories}{shared}{incoming},
recursive => 0,
debug => 2,
directory => $anvil->data->{path}{directories}{shared}{base},
recursive => 1,
});
my $incoming_directory = $anvil->data->{path}{directories}{shared}{incoming};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0264"});
foreach my $full_path (sort {$a cmp $b} keys %{$anvil->data->{scan}{directories}})
{
# Is this a file?
my $file_name = $anvil->data->{scan}{directories}{$full_path}{name};
# Skip if this isn't a file.
my $file_type = $anvil->data->{scan}{directories}{$full_path}{type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
full_path => $full_path,
file_type => $file_type,
}});
next if $file_type ne "file";
my $file_name = $anvil->data->{scan}{directories}{$full_path}{name};
my $file_size = $anvil->data->{scan}{directories}{$full_path}{size};
my $file_mtime = $anvil->data->{scan}{directories}{$full_path}{mtime};
my $file_mimetype = $anvil->data->{scan}{directories}{$full_path}{mimetype};
my $file_executable = $anvil->data->{scan}{directories}{$full_path}{executable} = -x $full_path ? 1 : 0;
my $say_mimetype = convert_mimetype($anvil, $file_mimetype, $full_path, $file_executable);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
full_path => $full_path,
file_name => $file_name,
file_type => $file_type,
file_size => $file_size,
file_mtime => $file_mtime,
file_mimetype => $file_mimetype,
file_executable => $file_executable,
say_mimetype => $say_mimetype,
}});
next if $file_type ne "file";
# If this file is over 128 MiB, warn the user that it might take a second
# Do I know about this file? If so, is the file the same size? If either is no, calculate the md5sum.
my ($file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum) = get_file_db_info($anvil, "", $file_name);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
file_uuid => $file_uuid,
recorded_size => $recorded_size,
recorded_mtime => $recorded_mtime,
recorded_md5sum => $recorded_md5sum,
}});
# Calculate the md5sum?
my $file_md5sum = $recorded_md5sum;
if ((not $file_uuid) or ($file_size != $recorded_size))
{
# Yes. But first, do we have a size mismatch? If so, see if we need to pull a newer
# version down from elsewhere.
if (($file_uuid) && ($file_mtime <= $recorded_mtime))
{
# We've got an older file, we need to update.
pull_file($anvil, $file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum);
# TODO: Now see if it exists and, if it does, re-stat it. If not, loop to the
# next file and skip this one.
}
# Now generate the md5sum. If this file is over 128 MiB, warn the user that it might
# take a while.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0265", variables => { file => $full_path }});
if ($file_size > (128 * (2 ** 20)))
{
@ -244,24 +277,30 @@ sub check_incoming
size => $anvil->Convert->bytes_to_human_readable({'bytes' => $file_size}),
}});
}
my $say_mimetype = convert_mimetype($anvil, $file_mimetype, $full_path, $file_executable);
my $file_md5sum = $anvil->Get->md5sum({debug => 2, file => $full_path});
# Update (or get) the md5sum.
$file_md5sum = $anvil->Get->md5sum({debug => 2, file => $full_path});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
say_mimetype => $say_mimetype,
file_md5sum => $file_md5sum,
}});
# Do we know about this file? If not, file_uuid will be blank when we call the
# insert_or_update. If we do, it will update the file name, if needed.
my ($file_uuid) = $anvil->Database->insert_or_update_files({
# Insert or update the files entry.
($file_uuid) = $anvil->Database->insert_or_update_files({
debug => 2,
file_uuid => get_file_uuid($anvil, $file_md5sum, $file_name),
file_uuid => $file_uuid,
file_name => $file_name,
file_size => $file_size,
file_md5sum => $file_md5sum,
file_mtime => $file_mtime,
file_type => $say_mimetype,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }});
}
# If we still don't have a file UUID for some reason, skip this file.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }});
next if not $file_uuid;
# Make sure we know about this file on this system
my ($file_locatiom_uuid) = $anvil->Database->insert_or_update_file_locations({
@ -271,7 +310,14 @@ sub check_incoming
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_locatiom_uuid => $file_locatiom_uuid }});
# Not move it. If it's a definition file, we'll move it to
# Are we in the incoming directory? If so, move the file.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
full_path => $full_path,
incoming_directory => $incoming_directory,
}});
if ($full_path =~ /^$incoming_directory/)
{
# If it's a definition file, we'll move it to
# 'path::directories::shared::definitions', otherwise we'll move it to
# 'path::directories::shared::files'.
my $target = $say_mimetype eq "definition" ? $anvil->data->{path}{directories}{shared}{definitions} : $anvil->data->{path}{directories}{shared}{files};
@ -289,13 +335,33 @@ sub check_incoming
target_file => $target,
});
}
}
return(0);
}
# This method finds a file elsewhere on the network and pulls it to here.
sub pull_file
{
my ($anvil, $file_uuid, $recorded_size, $recorded_mtime, $recorded_md5sum) = @_;
$file_uuid = "" if not defined $file_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
file_uuid => $file_uuid,
recorded_size => $recorded_size,
recorded_mtime => $recorded_mtime,
recorded_md5sum => $recorded_md5sum,
}});
# Find the hosts with this file, then connect to it to see if the size is the same as what we want.
# If so, pull it down...
### TODO
return(0);
}
# This gets the file_uuid for a given file name and/or md5sum. If the file isn't found, an empty string is
# returned.
sub get_file_uuid
# returned. If it is found, the file size as recorded in the database is returned.
sub get_file_db_info
{
my ($anvil, $file_md5sum, $file_name) = @_;
$file_md5sum = "" if not defined $file_md5sum;
@ -305,12 +371,15 @@ sub get_file_uuid
file_name => $file_name,
}});
### TODO: At some point, we'll need to deal with the possibility that the same file name with
### different md5sums might exist in the database.
# Get the file size and file uuid, if possible.
# If I have the md5sum, search using that. If I have the filename only, then we'll fall back to that.
my $query = "
SELECT
file_uuid
file_uuid,
file_size,
file_mtime,
file_md5sum
FROM
files
WHERE
@ -332,14 +401,20 @@ WHERE
if (not $count)
{
# File wasn't found in the database
return("");
return("", 0, 0, "");
}
my $file_uuid = $results->[0]->[0];
my $file_uuid = defined $results->[0]->[0] ? $results->[0]->[0] : "";
my $file_size = defined $results->[0]->[1] ? $results->[0]->[1] : 0;
my $file_mtime = defined $results->[0]->[2] ? $results->[0]->[2] : 0;
$file_md5sum = defined $results->[0]->[3] ? $results->[0]->[3] : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
file_uuid => $file_uuid,
file_size => $file_size,
file_mtime => $file_mtime,
file_md5sum => $file_md5sum,
}});
return($file_uuid);
return($file_uuid, $file_size, $file_mtime, $file_md5sum);
}
# This handles toggling a file to marked or unmarked as a script.
@ -359,8 +434,8 @@ sub handle_script
$anvil->nice_exit({exit_code => 3});
}
# Find the file_uuid.
my ($file_uuid) = get_file_uuid($anvil, "", $anvil->data->{switches}{file});
# Find the file_uuid (we don't actually care about the file size, mtime or md5sum).
my ($file_uuid, $file_size, $recorded_mtime, $file_md5sum) = get_file_db_info($anvil, "", $anvil->data->{switches}{file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { file_uuid => $file_uuid }});
# Toggle the executable bits.
@ -403,7 +478,8 @@ SELECT
file_name,
file_size,
file_md5sum,
file_type
file_type,
file_mtime
FROM
files
WHERE
@ -417,11 +493,13 @@ WHERE
my $file_size = $results->[0]->[1];
my $file_md5sum = $results->[0]->[2];
my $file_type = $results->[0]->[3];
my $file_mtime = $results->[0]->[4];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
file_name => $file_name,
file_size => $file_size,
file_md5sum => $file_md5sum,
file_type => $file_type,
file_mtime => $file_mtime,
}});
if (($file_type eq "script") && (not $anvil->data->{switches}{'is-script'}))
{
@ -429,23 +507,15 @@ WHERE
my $mimetype = mimetype($file_name);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mimetype => $mimetype }});
my $say_mimetype = convert_mimetype($anvil, $mimetype, $file_name, $executable);
my $say_mimetype = convert_mimetype($anvil, $mimetype, $file_name, 0);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { say_mimetype => $say_mimetype }});
$anvil->Database->insert_or_update_files({
file_uuid => $file_uuid,
file_name => $anvil->data->{switches}{file},
file_size => $file_size,
file_md5sum => $file_md5sum,
file_type => $say_mimetype,
});
# Change the file tpye to "say_mimetype".
$anvil->Database->insert_or_update_files({
debug => 2,
file_uuid => $file_uuid,
file_name => $anvil->data->{switches}{file},
file_size => $file_size,
file_md5sum => $file_md5sum,
file_mtime => $file_mtime,
file_type => $say_mimetype,
});
}
@ -458,6 +528,7 @@ WHERE
file_name => $anvil->data->{switches}{file},
file_size => $file_size,
file_md5sum => $file_md5sum,
file_mtime => $file_mtime,
file_type => "script",
});
}

Loading…
Cancel
Save