* Created the new 'anvil-manage-server-storage' with the first role of reload a DRBD resource.

* Updated Remote->call() to remove the 'background' parameter as it wasn't working.
* Updated anvil-manage-server-storage to use 'anvil-manage-server-storage' to adjust resources in a way that doesn't block.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 2 years ago
parent 7fbed10864
commit 1b8b0bc493
  1. 1
      Anvil/Tools.pm
  2. 16
      Anvil/Tools/Remote.pm
  3. 1
      man/Makefile.am
  4. 32
      man/anvil-special-operations.8
  5. 6
      share/words.xml
  6. 1
      tools/Makefile.am
  7. 89
      tools/anvil-manage-server-storage
  8. 120
      tools/anvil-special-operations

@ -1150,6 +1150,7 @@ sub _set_paths
'anvil-safe-start' => "/usr/sbin/anvil-safe-start",
'anvil-safe-stop' => "/usr/sbin/anvil-safe-stop",
'anvil-shutdown-server' => "/usr/sbin/anvil-shutdown-server",
'anvil-special-operations' => "/usr/sbin/anvil-special-operations",
'anvil-sync-shared' => "/usr/sbin/anvil-sync-shared",
'anvil-update-files' => "/usr/sbin/anvil-update-files",
'anvil-update-states' => "/usr/sbin/anvil-update-states",

@ -224,13 +224,9 @@ B<NOTE>: By default, a connection to a target will be held open and cached to in
Parameters;
=head3 background (optional, default '0')
If set to C<< 1 >>, the command is run in the background. In this case, the PID of the SSH process is returned. The called should use C<< waitpid >> to ensure the PID has been reaped.
=head3 close (optional, default '0')
If set to C<< 1 >>, the connection to the target will be closed at the end of the call.
If set, the connection to the target will be closed at the end of the call.
=head3 log_level (optional, default C<< 3 >>)
@ -304,7 +300,6 @@ sub call
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "cache::ssh_fh::${ssh_fh_key}" => $anvil->data->{cache}{ssh_fh}{$ssh_fh_key} }});
# Now pick up the rest of the variables.
my $background = defined $parameter->{background} ? $parameter->{background} : 0;
my $close = defined $parameter->{'close'} ? $parameter->{'close'} : 0;
my $no_cache = defined $parameter->{no_cache} ? $parameter->{no_cache} : 0;
my $password = defined $parameter->{password} ? $parameter->{password} : "";
@ -315,7 +310,6 @@ sub call
my $ssh_fh = $anvil->data->{cache}{ssh_fh}{$ssh_fh_key};
# NOTE: The shell call might contain sensitive data, so we show '--' if 'secure' is set and $anvil->Log->secure is not.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
background => $background,
'close' => $close,
password => $anvil->Log->is_secure($password),
secure => $secure,
@ -649,14 +643,6 @@ sub call
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { ssh_fh => $ssh_fh }});
if ($ssh_fh =~ /^Net::OpenSSH/)
{
# Are we doing a background call?
if ($background)
{
my $pid = $ssh_fh->spawn($shell_call);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { pid => $pid }});
return($pid);
}
# The shell_call can't end is a newline. Conveniently, we want the return code. By adding
# this, we ensure it doesn't end in a new-line (and we can't blindly strip off the last
# new-line because of 'EOF' type cat's).

@ -21,6 +21,7 @@ dist_man8_MANS = \
anvil-manage-server.8 \
anvil-manage-server-storage.8 \
anvil-manage-storage-groups.8 \
anvil-special-operations.8 \
anvil-watch-drbd.8 \
scancore.8 \
striker-check-machines.8 \

@ -0,0 +1,32 @@
.\" Manpage for the Anvil! storage groups
.\" Contact mkelly@alteeve.com to report issues, concerns or suggestions.
.TH anvil-special-operations "8" "Jun 30 2023" "Anvil! Intelligent Availability™ Platform"
.SH NAME
anvil-special-operations \- This program is generally meant to be used by other programs.
.SH SYNOPSIS
.B anvil-special-operations
\fI\,<command> \/\fR[\fI\,options\/\fR]
.SH DESCRIPTION
This tool is used, generally by other parts of the Anvil!, the accomplish tasks that generally can't be accomplished by direct system calls. It's a general purpose tool meant to solve specific corner cases.
.TP
.SH OPTIONS
.TP
\-?, \-h, \fB\-\-help\fR
Show this man page.
.TP
\fB\-\-log-secure\fR
When logging, record sensitive data, like passwords.
.TP
\-v, \-vv, \-vvv
Set the log level to 1, 2 or 3 respectively. Be aware that level 3 generates a significant amount of log data.
.SS "Commands:"
.TP
\fB\-\-task\fR
This is the task being requested. Current optiopns are:
.IP refresh-drbd-resource
This requires \fB\-\-resource <new name>\fR, and will call 'drbdadm adjust <resource>' as a background task and then return immediately. This is required when adding a new volume to an existing resource as 'drbdadm adjust <res>' will hold until it is called on all active DRBD nodes. This blocks the caller after the first remote host call.
.IP
.SH AUTHOR
Written by Madison Kelly, Alteeve staff and the Anvil! project contributors.
.SH "REPORTING BUGS"
Report bugs to users@clusterlabs.org

@ -602,6 +602,7 @@ The error was:
</key>
<key name="error_0417">There was a problem with finding a common storage network between: [#!variable!node1_name!#] and: [#!variable!node2_name!#]. Found node 1 to have the IP: [#!variable!node1_ip!#] and node 2: [#!variable!node2_ip!#]. Is there a problem with '/etc/hosts'?</key>
<key name="error_0418">Failed to find a network to use for storage replication. Is there a problem with '/etc/hosts'?</key>
<key name="error_0419"><![CDATA[[ Error ] - The resource to refresh must be provide with '--resource <res>'.]]></key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which lines are translatable -->
@ -1552,6 +1553,8 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0462"><![CDATA[ --driver-disc - (optional) A driver disc to be added as a second optical drive. Valid options are above.]]></key>
<key name="job_0463">Enabling the enable-safe-start daemon.</key>
<key name="job_0464">Calling select ScanCore scan agents to ensure the database is updated.</key>
<key name="job_0465">Reload (adjust) a DRBD resource</key>
<key name="job_0466">This job is to reload (adjust) a DRBD resource. It's run as a job as it blocks until the adjust is run on all nodes.</key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>
@ -2893,6 +2896,9 @@ Proceed? [y/N]</key>
<key name="message_0308">The DRBD config file was not found. A protect job needs to be run from the Anvil! node hosting the server to be protected.</key>
<key name="message_0309">Waiting a bit to make sure the file: [#!variable!file!#] is done uploading...</key>
<key name="message_0310">Upload complete.</key>
<key name="message_0311">Picked up the special operation job.</key>
<key name="message_0312">Reloading (adjusting) the DRBD resource: [#!variable!resource!#]. This will not complete until all peers have also reloaded this resource.</key>
<key name="message_0313">DRBD resource: [#!variable!resource!#] has been reloaded.</key>
<!-- Translate names (protocols, etc) -->
<key name="name_0001">Normal Password</key> <!-- none in mail-server -->

@ -37,6 +37,7 @@ dist_sbin_SCRIPTS = \
anvil-scan-network \
anvil-show-local-ips \
anvil-shutdown-server \
anvil-special-operations \
anvil-sync-shared \
anvil-test-alerts \
anvil-update-definition \

@ -950,69 +950,52 @@ sub manage_disk_add
}
}
### NOTE: The call to 'drbdadm adjust <res>' hangs, hard, until the same command is run on the peers.
### To deal with this, we register jobs to run 'anvil-special-operations' on the peers, then we
### call adjust here.
# Adjust to start/connect.
my @pids;
foreach my $host_type ("node", "dr")
{
foreach my $short_host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}})
{
my $host_uuid = $anvil->data->{drbd_resource}{$drbd_resource}{host_type}{$host_type}{short_host_name}{$short_host_name}{host_uuid};
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource;
my $shell_call = $anvil->data->{path}{exe}{'anvil-special-operations'}." --task refresh-drbd-resource --resource ".$drbd_resource.$anvil->Log->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:short_host_name' => $short_host_name,
's2:host_uuid' => $host_uuid,
's3:shell_call' => $shell_call,
}});
next if $host_uuid eq $anvil->Get->host_uuid;
### NOTE: The 'adjust' call doesn't return until it's adjusted on all machines, so we
### make these calls as background calls.
# Create the metadata, but don't exit on failure in case the metadata was created in
# a previous pass.
if ($host_uuid eq $anvil->Get->host_uuid)
{
print "- Adjusting the local resource: [".$drbd_resource."] to pick up the new config.\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({
shell_call => $shell_call,
background => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
else
{
# We'll use this in a minute to confirm connections.
$anvil->data->{peers}{$short_host_name}{host_uuid} = $host_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"peers::${short_host_name}::host_uuid" => $anvil->data->{peers}{$short_host_name}{host_uuid},
}});
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
debug => 2,
job_command => $shell_call,
job_data => "adjust=".$drbd_resource,
job_name => "server::add_disk::rescan",
job_title => "job_0465",
job_description => "job_0466",
job_progress => 0,
job_host_uuid => $host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
### NOTE: This is expected to timeout when DR is used.
print "- Adjusting the peer: [".$short_host_name."]'s resource: [".$drbd_resource."] to pick up the new config.\n";
my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip};
my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network};
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource;
my ($pid) = $anvil->Remote->call({
debug => 2,
background => 1,
shell_call => $shell_call,
target => $use_ip,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
push @pids, $pid;
}
print "- Registered a job with job UUID: [".$job_uuid."] to reload the resource config on the host: [".$short_host_name."].\n";
}
}
# Wait for the remote PID(s) to be reaped.
# foreach my $pid (@pids)
# {
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pid => $pid }});
# next if not $pid;
# waitpid($pid, 0);
# }
print "- Adjusting the local resource: [".$drbd_resource."] to pick up the new config.\n";
print "[ NOTE ] - If this hangs, make sure 'anvil-daemon' is running on the peers.\n";
$shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$drbd_resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({
debug => 2,
background => 1,
shell_call => $shell_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Find which node is currently Primary and use that host to force primary to start sync. If none,
# force here.
@ -1171,7 +1154,7 @@ sub manage_disk_add
{
my $use_ip = $anvil->data->{peer}{$short_host_name}{access}{ip};
my $use_network = $anvil->data->{peer}{$short_host_name}{access}{network};
print " - The resource is primary onthe peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]";
print " - The resource is primary on the peer: [".$short_host_name."], forcing primary there via: [".$use_ip." (".$use_network.")]";
my ($output, $error, $return_code) = $anvil->Remote->call({
shell_call => $shell_call,
target => $use_ip,
@ -1215,13 +1198,11 @@ sub manage_disk_add
print "Initial sync does not appear to be required.\n";
}
# my $startup_needed = 1;
# my $local_role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} : "";
# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_role => $local_role }});
=cut
my $startup_needed = 1;
my $local_role = defined $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$drbd_resource}{role} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_role => $local_role }});
# Create the DRBD metadata. For this, we don't fail.
foreach my $host_type ("node", "dr")
{

@ -0,0 +1,120 @@
#!/usr/bin/perl
#
# This program has no specific purpose. It's a general program for performing certain special tasks that
# can't be done otherwise in a reliable or efficient way.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connection.
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Text::Diff;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"task",
"resource",
], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
if ($anvil->data->{switches}{'job-uuid'})
{
$anvil->Job->clear();
$anvil->Job->get_job_details({debug => 2});
$anvil->Job->update_progress({
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0311",
});
}
if ($anvil->data->{switches}{task} eq "refresh-drbd-resource")
{
refresh_drbd_resource($anvil);
}
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# This function is needed to call 'drbdadm adjust <res>' in a background call from a remote host. This is
# needed for adding new volumes to an existing resource, as the call from 'drbdadm adjust <res>' won't return
# until the call is run on all hosts.
sub refresh_drbd_resource
{
my ($anvil) = @_;
my $resource = $anvil->data->{switches}{resource};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
if (not $resource)
{
# No resource.
$anvil->Job->update_progress({
progress => 100,
message => "error_0419",
job_status => "failed",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0419"});
$anvil->nice_exit({exit_code => 1});
}
$anvil->Job->update_progress({
progress => 10,
message => "message_0312,!!resource!".$resource."!!",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0312", variables => { resource => $resource }});
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$resource;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({
shell_call => $shell_call,
background => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
$anvil->Job->update_progress({
progress => 100,
message => "message_0313,!!resource!".$resource."!!",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "message_0313", variables => { resource => $resource }});
$anvil->nice_exit({exit_code => 0});
return(0);
}
Loading…
Cancel
Save