anvil/tools/anvil-manage-dr
Digimer 6664c5b77f * Fixed a bug where scan-drbd, with DR configured, was not recording TCP ports assigned to connections properly.
* More bugs fixed in anvil-manage-dr, tested repeatedly as a job and so far, so good. Other functionality still to come.

Signed-off-by: Digimer <digimer@alteeve.ca>
2021-09-12 23:34:25 -04:00

1598 lines
65 KiB
Perl
Executable File

#!/usr/bin/perl
#
# This manages if a server is backed up to a DR host or not. When enabled, it can start or stop replication.
#
# NOTE: Unlike most jobs, this one will directly work on the peer node and the DR host using SSH connections.
# This behaviour is likely to change later as it's not ideal.
#
# Exit codes;
# 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Term::Cap;
use Text::Diff;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
#
$anvil->data->{switches}{'connect'} = ""; # connect an existing DR resource
$anvil->data->{switches}{disconnect} = ""; # disconnect
$anvil->data->{switches}{'job-uuid'} = ""; # Used later
$anvil->data->{switches}{protect} = ""; # Set
$anvil->data->{switches}{protocol} = ""; # "sync", "async" or "long-throw"
$anvil->data->{switches}{remove} = ""; # Set
$anvil->data->{switches}{server} = ""; # Name or UUID
$anvil->data->{switches}{update} = ""; # connects, if needed, and disconnects once UpToDate
$anvil->data->{switches}{Yes} = ""; # Set to avoid confirmation, not case sensitive
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::connect' => $anvil->data->{switches}{'connect'},
'switches::disconnect' => $anvil->data->{switches}{disconnect},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::protect' => $anvil->data->{switches}{protect},
'switches::protocol' => $anvil->data->{switches}{protocol},
'switches::remove' => $anvil->data->{switches}{remove},
'switches::server' => $anvil->data->{switches}{server},
'switches::update' => $anvil->data->{switches}{update},
'switches::Yes' => $anvil->data->{switches}{Yes},
}});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0306"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
# If we've got a job UUID, load the job details.
if ($anvil->data->{switches}{'job-uuid'})
{
load_job($anvil);
}
my $terminal = "";
if (not $anvil->data->{switches}{'job-uuid'})
{
my $termios = new POSIX::Termios;
$termios->getattr;
my $ospeed = $termios->getospeed;
$terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed };
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { terminal => $terminal }});
$terminal->Trequire(qw/ce ku kd/);
print $terminal->Tputs('cl');
}
sanity_check($anvil, $terminal);
do_task($anvil, $terminal);
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
sub do_task
{
my ($anvil, $terminal) = @_;
# What task am I doing?
if ($anvil->data->{switches}{protect})
{
}
return(0);
}
sub sanity_check
{
my ($anvil, $terminal) = @_;
# Begin sanity checks
$anvil->Job->update_progress({
progress => 10,
message => "job_0358",
});
# Are we a node or DR?
my $host_type = $anvil->Get->host_type();
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_type => $host_type,
anvil_uuid => $anvil_uuid,
}});
if (($host_type ne "node") or (not $anvil_uuid))
{
# This must be run on a node active in the cluster hosting the server being managed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0332"});
$anvil->Job->update_progress({
progress => 100,
message => "error_0332",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
# Get the Anvil! details.
$anvil->Database->get_hosts();
$anvil->Database->get_anvils();
$anvil->Database->get_storage_group_data();
# Does this Anvil! have a DR node?
if (not $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid})
{
# This Anvil! does not seem to have a DR host. Exiting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0333"});
$anvil->Job->update_progress({
progress => 100,
message => "error_0333",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
# Can we access DR?
my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid};
my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name};
my $dr_ip = $anvil->System->find_matching_ip({
debug => 2,
host => $dr1_host_name,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
password => $anvil->Log->is_secure($password),
dr1_host_uuid => $dr1_host_uuid,
dr1_host_name => $dr1_host_name,
dr_ip => $dr_ip,
}});
if ((not $dr_ip) or ($dr_ip eq "!!error!!"))
{
# Failed to find an IP we can access the DR host. Has it been configured? Is it running? Exiting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0334", variables => { host_name => $dr1_host_name }});
$anvil->Job->update_progress({
progress => 0,
message => "error_0334,!!host_name!".$dr1_host_name."!!",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
# Test access.
my $access = $anvil->Remote->test_access({
target => $dr_ip,
password => $password,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
if (not $access)
{
# Failed to access the DR host. Is it running? Exiting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0335", variables => {
host_name => $dr1_host_name,
ip_address => $dr_ip,
}});
$anvil->Job->update_progress({
progress => 0,
message => "error_0335,!!host_name!".$dr1_host_name."!!,!!ip_address!".$dr_ip."!!",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
# Can we parse the CIB?
my ($problem) = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"});
$anvil->Job->update_progress({
progress => 0,
message => "error_0336",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
$anvil->nice_exit({exit_code => 1});
}
# Both nodes need to be in the cluster, are they?
if (not $anvil->data->{cib}{parsed}{'local'}{ready})
{
# We're not a full member of the cluster yet. Please try again once we're fully in. Exiting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"});
$anvil->Job->update_progress({
progress => 0,
message => "error_0337",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
### TODO: We can queue a job to update the peer later, there's no real need, in the long run, for the
### peer to be online.
# If we're protecting or removing a server from DR, the peer needs to be up.
if ((($anvil->data->{switches}{protect}) or
($anvil->data->{switches}{remove}) or
($anvil->data->{switches}{protocol})) &&
(not $anvil->data->{cib}{parsed}{peer}{ready}))
{
if ($anvil->data->{switches}{protect})
{
# We can't setup a server to be protected unless both nodes are up, and the peer
# isn't at this time.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0338"});
$anvil->Job->update_progress({
progress => 0,
message => "error_0338",
job_status => "failed",
});
}
else
{
# We can't remove a server from DR unless both nodes are up, and the peer isn't at
# this time.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0339"});
$anvil->Job->update_progress({
progress => 0,
message => "error_0339",
job_status => "failed",
});
}
$anvil->nice_exit({exit_code => 1});
}
# Verify we found the server.
$anvil->data->{server}{'server-name'} = "";
$anvil->data->{server}{'server-uuid'} = "";
$anvil->data->{server}{'anvil-uuid'} = $anvil_uuid;
if (not $anvil->data->{switches}{server})
{
# Please specify the server to manager using '--server <name or uuid>'.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0340"});
$anvil->Job->update_progress({
progress => 100,
message => "error_0340",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
else
{
my $server = $anvil->data->{switches}{server};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server => $server }});
$anvil->Database->get_servers();
if (exists $anvil->data->{servers}{server_uuid}{$server})
{
$anvil->data->{server}{'server-uuid'} = $server;
$anvil->data->{server}{'server-name'} = $anvil->data->{servers}{server_uuid}{$server}{server_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'server::server-uuid' => $anvil->data->{server}{'server-uuid'},
'server::server-name' => $anvil->data->{server}{'server-name'},
}});
}
if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server})
{
$anvil->data->{server}{'server-name'} = $server;
$anvil->data->{server}{'server-uuid'} = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}{server_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'server::server-uuid' => $anvil->data->{server}{'server-uuid'},
'server::server-name' => $anvil->data->{server}{'server-name'},
}});
}
}
# Get and parse the server's definition to find the DRBD devices.
if ((not $anvil->data->{server}{'server-uuid'}) or (not $anvil->data->{server}{'server-name'}))
{
# Failed to find the server by name or UUID.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0341", variables => { server => $anvil->data->{switches}{server} }});
$anvil->Job->update_progress({
progress => 100,
message => "error_0341,!!server!".$anvil->data->{switches}{server}."!!",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
if (not $anvil->data->{switches}{protocol})
{
$anvil->data->{switches}{protocol} = "async";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::protocol' => $anvil->data->{switches}{protocol},
}});
}
elsif (($anvil->data->{switches}{protocol} ne "sync") &&
($anvil->data->{switches}{protocol} ne "async") &&
($anvil->data->{switches}{protocol} ne "long-throw"))
{
# The protocol is invalid. Please use '--help' for more information.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0342", variables => { protocol => $anvil->data->{switches}{protocol} }});
$anvil->Job->update_progress({
progress => 100,
message => "error_0341,!!protocol!".$anvil->data->{switches}{protocol}."!!",
job_status => "failed",
});
$anvil->nice_exit({exit_code => 1});
}
# Are we being asked to actuall do something?
if (((not $anvil->data->{switches}{'connect'}) &&
(not $anvil->data->{switches}{disconnect}) &&
(not $anvil->data->{switches}{protect}) &&
(not $anvil->data->{switches}{remove}) &&
(not $anvil->data->{switches}{update})) or
($anvil->data->{switches}{help}) or
($anvil->data->{switches}{h}))
{
print "
What do you want to do?
Options (all require --server <name or UUID>);
--connect
Connect a server already on DR to it's DR copy, update the data there if needed and begin streaming
replication.
--disconnect
Disconnect a server from the DR image. This will end streaming replication.
--protect
The sets up the server to be imaged on DR, if it isn't already protected.
Notes: If the server is not running, the DRBD resource volume(s) will be brought up. Both nodes need
to be online and in the cluster.
--protocol <sync,async,long-throw>, default 'async'
This allows the protocol used to replicate data to the DR host to be configured. By default, 'async'
is used.
Modes:
async (default)
This tells the storage layer to consider the write to be completed once the data is on the
active node's network transmit buffer. In this way, the DR host is allowed to fall behind a
small amount, but the active nodes will not slow down because of higher network transit times
to the DR location.
NOTE: The transmit (TX) buffer size can be checked / updated with 'ethtool -g <link_device>'.
If the transmit buffer fills, storage will hold until the buffer flushes, causing
periodic storage IO waits. You can increase the buffer size to a certain degree with
'ethtool -G <link_device> tx <size>' (set on all storage network link devices on both
nodes. For more information, see:
https://www.linuxjournal.com/content/queueing-linux-network-stack
or
https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/monitoring-and-tuning-the-rx-ring-buffer_configuring-and-managing-networking
If you set the maximum transmit buffer size and still run into IO waits, consider
'long-throw'.
sync
This tells the storage layer to consider the write complete when the data has reached the DR
host's storage (when the data is committed to disk on DR). This means that the DR host will
never fall behind. However, if the DR's network latency is higher or the bandwidth to the DR
is lower than that of the latency/bandwidth between the nodes, then total storage performance
will be reduced to DR network speeds while DR is connected.
This should be tested before implemented in production.
long-throw
This is an option that requires an additional license fee to use.
This option (based on LINBIT's DRBD Proxy) and is designed for DR hosts that are connected
over a wide-area network (or other cases where the connection to the DR is high-latency, low
bandwidth or intermittently interrupted). It uses RAM on the host to act, effectively, as a
very large transmit buffer. This requires allocating host RAM to the task, and so could
reduces the available RAM assignable to assign to servers.
In this mode, the DR host is allowed to fall further behind production, but it significantly
reduces (hopefully eliminates) how often node replication waits because of a full transmit
buffer.
The default size is 16 MiB, with a maximum size of 16 GiB. When the size is set to over
1 GiB, the size allocated to this buffer is accounted for when calculating available RAM that
can be assigned to hosted servers.
--remove
This removes the DR image from the DR host for the server, freeing up space on DR but removing the
protection afforded by DR.
--update
This tells the DR to be connected and sync, Once the volume(s) on DR are 'UpToDate', the connection
is closed. This provides a point in time update of the server's image on DR.
--Yes
Note the capital 'Y'. This can be set to proceed without confirmation. Use carefully with '--protect'
and '--remove'! If the '--job-uuid' is set, this is assumed and no prompt will be presented.
Exiting.
";
if (($anvil->data->{switches}{help}) or ($anvil->data->{switches}{h}))
{
$anvil->nice_exit({exit_code => 0});
}
else
{
$anvil->nice_exit({exit_code => 1});
}
}
# Sanity checks complete!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0359"});
$anvil->Job->update_progress({
progress => 20,
message => "job_0359",
});
# If we're protecting, make sure there's enough space on the DR host.
if ($anvil->data->{switches}{protect})
{
process_protect($anvil, $terminal);
}
return(0);
}
sub process_protect
{
my ($anvil, $terminal) = @_;
# Parse out the DRBD resource's backing the server and get their LV sizes.
$anvil->Database->get_server_definitions();
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
my $anvil_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name};
my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{short_host_name};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name};
my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{short_host_name};
my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid};
my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name};
my $dr1_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{short_host_name};
my $server_name = $anvil->data->{server}{'server-name'};
my $server_uuid = $anvil->data->{server}{'server-uuid'};
my $short_host_name = $anvil->Get->short_host_name();
my $server_definition_xml = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_uuid => $anvil_uuid,
anvil_password => $anvil->Log->is_secure($anvil_password),
node1_host_uuid => $node1_host_uuid,
node1_host_name => $node1_host_name,
node1_short_host_name => $node1_short_host_name,
node2_host_uuid => $node2_host_uuid,
node2_host_name => $node2_host_name,
node2_short_host_name => $node2_short_host_name,
dr1_host_uuid => $dr1_host_uuid,
dr1_host_name => $dr1_host_name,
dr1_short_host_name => $dr1_short_host_name,
server_name => $server_name,
server_uuid => $server_uuid,
server_definition_xml => $server_definition_xml,
short_host_name => $short_host_name,
}});
# Sanity checks complete!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0360", variables => { server => $anvil->data->{server}{'server-name'} }});
$anvil->Job->update_progress({
progress => 30,
message => "job_0360,!!server!".$anvil->data->{server}{'server-name'}."!!",
});
$anvil->Server->parse_definition({
debug => 2,
host => $short_host_name,
server => $anvil->data->{server}{'server-name'},
source => "from_db",
definition => $server_definition_xml,
});
$anvil->DRBD->gather_data({debug => 2});
my $server_ram = $anvil->data->{server}{$short_host_name}{$server_name}{'from_db'}{memory};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server_ram => $anvil->Convert->add_commas({number => $server_ram})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $server_ram}).")",
}});
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$short_host_name}{$server_name}{drbd}{resource}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}})
{
my $this_host_uuid = $anvil->Get->host_uuid_from_name({host_name => $this_host_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
this_host_name => $this_host_name,
this_host_uuid => $this_host_uuid,
}});
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}})
{
# Always get the LV sizes, as that factors metadata. DRBD size is
# minus metadata, and 0 when down.
my $device_path = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path};
my $backing_disk = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk};
my $device_minor = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor};
my $tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:volume" => $volume,
"s2:device_path" => $device_path,
"s3:backing_disk" => $backing_disk,
"s4:device_minor" => $device_minor,
}});
my $this_size = $anvil->Storage->get_size_of_block_device({debug => 2, host_uuid => $this_host_uuid, path => $backing_disk});
if ($this_size eq "")
{
# DRBD config file was updated, but LV not created yet.
next;
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
this_size => $anvil->Convert->add_commas({number => $this_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $this_size}).")",
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
this_size => $anvil->Convert->add_commas({number => $this_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $this_size}).")",
}});
if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{size}) or (not $anvil->data->{server}{drbd}{$resource}{$volume}{size}))
{
$anvil->data->{server}{drbd}{$resource}{$volume}{size} = $this_size;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::size" => $anvil->data->{server}{drbd}{$resource}{$volume}{size},
}});
}
if (not exists $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid})
{
$anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid} = "";
}
### NOTE: This check make sense only under the assumption that the DRBD minor
### is common across both nodes. This should be the case, but doesn't
### strictly have to be so.
if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}) or
(not defined $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number}) or
($anvil->data->{server}{drbd}{$resource}{$volume}{minor_number} eq ""))
{
$anvil->data->{server}{drbd}{$resource}{$volume}{minor_number} = $device_minor;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::minor_number" => $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number},
}});
}
if ((not exists $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}) or
(not defined $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port}) or
($anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port} eq ""))
{
$anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port} = $tcp_port;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::tcp_port" => $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port},
}});
}
# What storage group does this belong to?
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid},
}});
if (not $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid})
{
my $storage_key = $resource."/".$volume;
my $storage_group_uuid = $anvil->Storage->get_storage_group_from_path({
debug => 2,
anvil_uuid => $anvil_uuid,
path => $backing_disk,
});
my $storage_group_name = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{group_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
storage_key => $storage_key,
storage_group_uuid => $storage_group_uuid,
storage_group_name => $storage_group_name,
}});
# We'll need to sum up the volumes on each storage group, as
# it's possible the volumes are on different SGs.
$anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid} = $storage_group_uuid;
$anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}{$storage_key} = 1;
$anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid} = $storage_group_uuid;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid},
"server::storage_groups::${storage_group_name}::used_by::${storage_key}" => $anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}{$storage_key},
"server::storage_groups::${storage_group_name}::storage_group_uuid" => $anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid},
}});
}
if ($this_size > $anvil->data->{server}{drbd}{$resource}{$volume}{size})
{
$anvil->data->{server}{drbd}{$resource}{$volume}{size} = $this_size;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::drbd::${resource}::${volume}::size" => $anvil->Convert->add_commas({number => $anvil->data->{server}{drbd}{$resource}{$volume}{size}})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $anvil->data->{server}{drbd}{$resource}{$volume}{size}}).")",
}});
}
}
}
}
# Make sure there is enough space on DR for the volumes under this VM.
my $problem = 0;
my $config_file = "";
foreach my $storage_group_name (sort {$a cmp $b} keys %{$anvil->data->{server}{storage_groups}})
{
my $storage_group_uuid = $anvil->data->{server}{storage_groups}{$storage_group_name}{storage_group_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
storage_group_name => $storage_group_name,
storage_group_uuid => $storage_group_uuid,
}});
# First, is this SG on DR?
if (not exists $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid})
{
# The DR host doesn't appear to be storage group.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0343", variables => {
host_name => $dr1_host_name,
storage_group => $$storage_group_name,
}});
$anvil->Job->update_progress({
progress => 100,
message => "error_0343,!!host_name!".$dr1_host_name."!!,!!storage_group!".$$storage_group_name."!!",
});
$problem = 1;
}
my $space_needed = 0;
foreach my $resource_key (sort {$a cmp $b} keys %{$anvil->data->{server}{storage_groups}{$storage_group_name}{used_by}})
{
my ($resource, $volume) = ($resource_key =~ /^(.*)\/(\d+)$/);
my $volume_size = $anvil->data->{server}{drbd}{$resource}{$volume}{size};
$space_needed += $volume_size,
$config_file = $anvil->data->{new}{resource}{$resource}{config_file};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
config_file => $config_file,
resource_key => $resource_key,
resource => $resource,
volume => $volume,
volume_size => $anvil->Convert->add_commas({number => $volume_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $volume_size}).")",
space_needed => $anvil->Convert->add_commas({number => $space_needed})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}).")",
}});
}
# Is there enough space on DR?
my $space_on_dr = $anvil->data->{storage_groups}{anvil_uuid}{$anvil_uuid}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid}{vg_free};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
space_on_dr => $anvil->Convert->add_commas({number => $space_on_dr})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_on_dr}).")",
space_needed => $anvil->Convert->add_commas({number => $space_needed})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}).")",
}});
if ($space_needed > $space_on_dr)
{
my $variables = {
space_needed => $anvil->Convert->bytes_to_human_readable({'bytes' => $space_needed}),
space_needed_bytes => $anvil->Convert->add_commas({number => $space_needed}),
storage_group => $storage_group_name,
space_on_dr => $anvil->Convert->bytes_to_human_readable({'bytes' => $space_on_dr}),
space_on_dr_bytes => $anvil->Convert->add_commas({number => $space_on_dr}),
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0344", variables => $variables});
$anvil->Job->update_progress({
progress => 100,
message => "error_0344",
variables => $variables
});
$problem = 1;
}
}
if ($problem)
{
$anvil->nice_exit({exit_code => 1});
}
# Get net next pair of TCP ports.
my (undef, $tcp_ports) = $anvil->DRBD->get_next_resource({
debug => 2,
dr_tcp_ports => 1,
});
my ($node1_to_dr_port, $node2_to_dr_port) = split/,/, $tcp_ports;
# Show what we're doing
my $variables = {
protocol => $anvil->data->{switches}{protocol},
node1_to_dr_port => $node1_to_dr_port,
node2_to_dr_port => $node2_to_dr_port,
config_file => $config_file,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0361", variables => $variables});
$anvil->Job->update_progress({
progress => 40,
message => "job_0361",
variables => $variables,
});
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}})
{
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$resource}})
{
my $variables = {
resource => $resource,
volume => $volume,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0362", variables => $variables});
$anvil->Job->update_progress({
progress => 43,
message => "job_0362",
variables => $variables,
});
my $lv_size = $anvil->data->{server}{drbd}{$resource}{$volume}{size};
my $storage_group_uuid = $anvil->data->{server}{drbd}{$resource}{$volume}{storage_group_uuid};
my $dr_lv_name = $resource."_".$volume;
my $dr_vg_name = $anvil->Storage->get_vg_name({
debug => 3,
storage_group_uuid => $storage_group_uuid,
host_uuid => $dr1_host_uuid,
});
my $dr_lv_path = "/dev/".$dr_vg_name."/".$dr_lv_name;
my $extent_size = $anvil->data->{storage_groups}{storage_group_uuid}{$storage_group_uuid}{host_uuid}{$dr1_host_uuid}{vg_extent_size};
my $extent_count = int($lv_size / $extent_size);
my $shell_call = $anvil->data->{path}{exe}{lvcreate}." -l ".$extent_count." -n ".$dr_lv_name." ".$dr_vg_name." -y";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s01:resource" => $resource,
"s02:volume" => $volume,
"s03:lv_size" => $anvil->Convert->add_commas({number => $lv_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size}).")", ,
"s04:storage_group_uuid" => $storage_group_uuid,
"s05:dr_lv_name" => $dr_lv_name,
"s06:dr_vg_name" => $dr_vg_name,
"s07:dr_lv_path" => $dr_lv_path,
"s08:extent_size" => $anvil->Convert->add_commas({number => $extent_size})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $extent_size}).")",
"s09:extent_count" => $extent_count,
"s10:shell_call" => $shell_call,
}});
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lvcreate_call} = $shell_call;
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path} = $dr_lv_path;
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{storage_group_uuid} = $storage_group_uuid;
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_tcp_port} = $anvil->data->{server}{drbd}{$resource}{$volume}{tcp_port};
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor} = $anvil->data->{server}{drbd}{$resource}{$volume}{minor_number};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::dr::volumes::${resource}::${volume}::lvcreate_call" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lvcreate_call},
"server::dr::volumes::${resource}::${volume}::lv_path" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path},
"server::dr::volumes::${resource}::${volume}::storage_group_uuid" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{storage_group_uuid},
"server::dr::volumes::${resource}::${volume}::drbd_tcp_port" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_tcp_port},
"server::dr::volumes::${resource}::${volume}::drbd_minor" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor},
}});
# Get the VG name that this volume will be created on.
$variables = {
lv_path => $dr_lv_path,
lv_size => $anvil->Convert->bytes_to_human_readable({'bytes' => $lv_size}),
lv_size_bytes => $anvil->Convert->add_commas({number => $lv_size}),
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0363", variables => $variables});
$anvil->Job->update_progress({
progress => 47,
message => "job_0363",
variables => $variables,
});
}
}
### NOTE: 'Yes' is set when a job is picked up, so this won't re-register the job.
my $record_job = 0;
if (not $anvil->data->{switches}{Yes})
{
# Ask the user to confirm.
print "\n".$anvil->Words->string({key => "message_0021"})."\n";
my $answer = <STDIN>;
chomp $answer;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }});
if ($answer =~ /^y/i)
{
print $anvil->Words->string({key => "message_0175"})."\n";
$record_job = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }});
}
else
{
print $anvil->Words->string({key => "message_0022"})."\n";
$anvil->nice_exit({exit_code => 0});
}
}
elsif (not $anvil->data->{switches}{'job-uuid'})
{
$record_job = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }});
}
if ($record_job)
{
my $job_data = "server=".$anvil->data->{switches}{server}."\n";
$job_data .= "protect=1\n";
$job_data .= "protocol=".$anvil->data->{switches}{protocol}."\n";
# Register the job with this host
my ($job_uuid) = $anvil->Database->insert_or_update_jobs({
debug => 2,
job_command => $anvil->data->{path}{exe}{'anvil-manage-dr'}.$anvil->Log->switches,
job_data => $job_data,
job_name => "server::dr",
job_title => "job_0356",
job_description => "job_0357",
job_progress => 0,
job_host_uuid => $anvil->Get->host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }});
# Report the job UUID.
print $anvil->Words->string({key => "job_0383", variables => { job_uuid => $job_uuid }})."\n";
$anvil->nice_exit({exit_code => 0});
}
### If we're still here, time to get started.
# Read in the old config and update it.
my $old_resource_config = $anvil->Storage->read_file({file => $config_file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { old_resource_config => $old_resource_config }});
# Pull the data out of the old file
my $hosts = "";
my $nodes_tcp_port = "";
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
my $dr1_seen = 0;
foreach my $this_host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$resource}{host}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { this_host_name => $this_host_name }});
my $node_id = "";
if (($this_host_name eq $node1_short_host_name) or ($this_host_name eq $node1_host_name))
{
$node_id = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node_id => $node_id }});
if ((not $nodes_tcp_port) &&
(exists $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}) &&
($anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port}))
{
$nodes_tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { nodes_tcp_port => $nodes_tcp_port }});
}
}
elsif (($this_host_name eq $node2_short_host_name) or ($this_host_name eq $node2_host_name))
{
$node_id = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node_id => $node_id }});
if ((not $nodes_tcp_port) &&
(exists $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}) &&
($anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port}))
{
$nodes_tcp_port = $anvil->data->{new}{resource}{$resource}{peer}{$this_host_name}{tcp_port};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { nodes_tcp_port => $nodes_tcp_port }});
}
}
elsif (($this_host_name eq $dr1_short_host_name) or ($this_host_name eq $dr1_host_name))
{
$node_id = 2;
$dr1_seen = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
node_id => $node_id,
dr1_seen => $dr1_seen,
}});
}
my $volumes = "";
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$resource}})
{
my $device_path = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path};
my $backing_disk = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk};
my $device_minor = $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:device_path" => $device_path,
"s2:backing_disk" => $backing_disk,
"s3:device_minor" => $device_minor,
}});
$volumes .= $anvil->Words->string({key => "file_0004", variables => {
volume => $volume,
drbd_path => $device_path,
minor => $device_minor,
lv_path => $backing_disk,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volumes => $volumes }});
# Record the DRBD device for adding DR.
if (not exists $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path})
{
$anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path} = $device_path;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server::dr::volumes::${resource}::${volume}::device_path" => $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path},
}});
}
}
$hosts .= $anvil->Words->string({key => "file_0003", variables => {
short_host_name => $this_host_name,
node_id => $node_id,
volumes => $volumes,
}});
}
if (not $dr1_seen)
{
# Inject the DR.
my $volumes = "";
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{dr}{volumes}{$resource}})
{
my $device_path = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{device_path};
my $backing_disk = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{lv_path};
my $device_minor = $anvil->data->{server}{dr}{volumes}{$resource}{$volume}{drbd_minor};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"s1:device_path" => $device_path,
"s2:backing_disk" => $backing_disk,
"s3:device_minor" => $device_minor,
}});
$volumes .= $anvil->Words->string({key => "file_0004", variables => {
volume => $volume,
drbd_path => $device_path,
minor => $device_minor,
lv_path => $backing_disk,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volumes => $volumes }});
}
$hosts .= $anvil->Words->string({key => "file_0003", variables => {
short_host_name => $dr1_short_host_name,
node_id => "2",
volumes => $volumes,
}});
}
}
### The connections.
$anvil->Database->get_ip_addresses({debug => 2});
my $node1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{network}{sn1}{ip_address};
my $node2_sn1_ip = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{network}{sn1}{ip_address};
my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address};
my $dr_protocol = "A";
if ($anvil->data->{switches}{protocol} eq "sync")
{
$dr_protocol = "C";
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
node1_sn1_ip => $node1_sn1_ip,
node2_sn1_ip => $node2_sn1_ip,
dr1_sn1_ip => $dr1_sn1_ip,
dr_protocol => $dr_protocol,
}});
# Node 1 to Node 2 first, then n
my $connections = $anvil->Words->string({key => "file_0005", variables => {
host1_short_name => $node1_short_host_name,
host1_sn_ip => $node1_sn1_ip,
host2_short_name => $node2_short_host_name,
host2_sn_ip => $node2_sn1_ip,
tcp_port => $nodes_tcp_port,
'c-rate-maximum' => 500,
protocol => "C",
fencing => "resource-and-stonith"
}});
# Node 1 to DR
$connections .= $anvil->Words->string({key => "file_0005", variables => {
host1_short_name => $node1_short_host_name,
host1_sn_ip => $node1_sn1_ip,
host2_short_name => $dr1_short_host_name,
host2_sn_ip => $dr1_sn1_ip,
tcp_port => $node1_to_dr_port,
'c-rate-maximum' => 500,
protocol => $dr_protocol,
fencing => "dont-care"
}});
# Node 2 to DR
$connections .= $anvil->Words->string({key => "file_0005", variables => {
host1_short_name => $node2_short_host_name,
host1_sn_ip => $node2_sn1_ip,
host2_short_name => $dr1_short_host_name,
host2_sn_ip => $dr1_sn1_ip,
tcp_port => $node2_to_dr_port,
'c-rate-maximum' => 500,
protocol => $dr_protocol,
fencing => "dont-care"
}});
my $new_resource_config = $anvil->Words->string({key => "file_0006", variables => {
server => $server_name,
hosts => $hosts,
connections => $connections,
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { new_resource_config => $new_resource_config }});
# Is the new res file the same as the old one?
my $difference = diff \$old_resource_config, \$new_resource_config, { STYLE => 'Unified' };
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { difference => $difference }});
if (not $difference)
{
# The resource file doesn't need to be updated.
my $variables = {
file => $config_file,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0364", variables => $variables});
$anvil->Job->update_progress({
progress => 50,
message => "job_0364",
variables => $variables,
});
}
else
{
# Write out a test file.
my $test_file = $anvil->data->{path}{directories}{temp}."/test-".$server_name.".res";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { test_file => $test_file }});
my ($problem) = $anvil->Storage->write_file({
debug => 2,
backup => 0,
overwrite => 1,
file => $test_file,
body => $new_resource_config,
user => "root",
group => "root",
mode => "0644",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
# Validate.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." --config-to-test ".$test_file." --config-to-exclude ".$config_file." sh-nop";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong.
my $variables = {
return_code => $return_code,
output => $output,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0345", variables => $variables});
$anvil->Job->update_progress({
progress => 100,
message => "error_0345",
variables => $variables,
});
$anvil->nice_exit({exit_code => 1});
}
# Remove the test file.
unlink $test_file;
# Backup the res file so we can tell the user where the current config was backed up to in
# case they need to restore it.
my ($backup_file) = $anvil->Storage->backup({file => $config_file});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }});
my $variables = { backup_file => $backup_file };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0365", variables => $variables});
$anvil->Job->update_progress({
progress => 60,
message => "job_0365",
variables => $variables,
});
# Write out the new file.
($problem) = $anvil->Storage->write_file({
debug => 2,
backup => 0,
overwrite => 1,
file => $config_file,
body => $new_resource_config,
user => "root",
group => "root",
mode => "0644",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0366"});
$anvil->Job->update_progress({
progress => 65,
message => "job_0366",
});
# Call 'drbdadm dump-xml' to check that it's OK.
($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{drbdadm}." dump-xml"});
if ($return_code)
{
# Something went wrong.
my $variables = {
return_code => $return_code,
output => $output,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0345", variables => $variables});
$anvil->Job->update_progress({
progress => 70,
message => "error_0345",
variables => $variables,
});
# Restoring the old config now.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0346"});
$anvil->Job->update_progress({
progress => 75,
message => "error_0346",
});
# Backup the bad file and worn the user.
my ($backup_file) = $anvil->Storage->backup({file => $config_file});
$variables = { file => $backup_file };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0347", variables => $variables});
$anvil->Job->update_progress({
progress => 80,
message => "error_0347",
variables => $variables,
});
# Write out the new file.
my ($problem) = $anvil->Storage->write_file({
debug => 2,
backup => 1,
overwrite => 1,
file => $config_file,
body => $old_resource_config,
user => "root",
group => "root",
mode => "0644",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { backup_file => $backup_file }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0348"});
$anvil->Job->update_progress({
progress => 100,
message => "error_0348",
variables => $variables,
});
$anvil->nice_exit({exit_code => 1});
}
# New config is good! Update the file on the peers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0367"});
$anvil->Job->update_progress({
progress => 70,
message => "job_0367",
});
}
# New config is good! Update the file on the peers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0368"});
$anvil->Job->update_progress({
progress => 72,
message => "job_0368",
});
foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid)
{
# "Peer" in this context is either a node or a DR host
next if $this_host_uuid eq $anvil->Get->host_uuid();
my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name};
my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address};
my $variables = {
file => $config_file,
host_name => $peer_host_name,
ip_address => $peer_sn_ip,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0369", variables => $variables});
$anvil->Job->update_progress({
progress => 75,
message => "job_0369",
variables => $variables,
});
my ($problem) = $anvil->Storage->write_file({
debug => 2,
backup => 1,
overwrite => 1,
file => $config_file,
body => $new_resource_config,
user => "root",
group => "root",
mode => "0644",
target => $peer_sn_ip,
password => $anvil_password,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
}
# Immediately call scan-drbd on all machines to ensure that if another run is about to happen for a
# different server, it knows the used ports list is updated.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0381"});
$anvil->Job->update_progress({
progress => 76,
message => "job_0381",
});
my $scan_drbd_call = $anvil->data->{path}{directories}{scan_agents}."/scan-drbd/scan-drbd --force ".$anvil->Log->switches();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { scan_drbd_call => $scan_drbd_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $scan_drbd_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid)
{
# "Peer" in this context is either a node or a DR host
next if $this_host_uuid eq $anvil->Get->host_uuid();
my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name};
my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address};
my $variables = { host_name => $peer_host_name };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0382", variables => $variables});
$anvil->Job->update_progress({
progress => 77,
message => "job_0382",
variables => $variables,
});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_sn_ip,
password => $anvil_password,
shell_call => $scan_drbd_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
# Create the LV and MD on DR.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0370"});
$anvil->Job->update_progress({
progress => 78,
message => "job_0370",
});
my $create_md = 0;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{server}{drbd}{$server_name}})
{
my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address};
my $lv_path = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lv_path};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume,
dr1_sn1_ip => $dr1_sn1_ip,
lv_path => $lv_path,
}});
my $variables = {
volume => $volume,
lv_path => $lv_path,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0371", variables => $variables});
$anvil->Job->update_progress({
progress => 80,
message => "job_0371",
variables => $variables,
});
my $lv_check_call = "
if [ -e '".$lv_path."' ];
then
echo exists;
else
echo create;
fi";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lv_check_call => $lv_check_call }});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $dr1_sn1_ip,
password => $anvil_password,
shell_call => $lv_check_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
if ($output eq "exists")
{
my $variables = { lv_path => $lv_path };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0372", variables => $variables});
$anvil->Job->update_progress({
progress => 80,
message => "job_0372",
variables => $variables,
});
next;
}
# Create the LV.
my $lvcreate_call = $anvil->data->{server}{dr}{volumes}{$server_name}{$volume}{lvcreate_call};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { lvcreate_call => $lvcreate_call }});
($output, $error, $return_code) = $anvil->Remote->call({
target => $dr1_sn1_ip,
password => $anvil_password,
shell_call => $lvcreate_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
sleep 1;
# Does it exist now?
($output, $error, $return_code) = $anvil->Remote->call({
target => $dr1_sn1_ip,
password => $anvil_password,
shell_call => $lv_check_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
if ($output eq "create")
{
my $variables = { lv_path => $lv_path };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0349", variables => $variables});
$anvil->Job->update_progress({
progress => 100,
message => "error_0349",
variables => $variables,
});
$anvil->nice_exit({exit_code => 1});
}
# Create the DRBD meta data now.
$create_md = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { create_md => $create_md }});
}
if ($create_md)
{
my $dr1_sn1_ip = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{network}{sn1}{ip_address};
my $drbd_md_call = $anvil->data->{path}{exe}{drbdadm}." --force create-md --max-peers=3 ".$server_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
dr1_sn1_ip => $dr1_sn1_ip,
drbd_md_call => $drbd_md_call,
}});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $dr1_sn1_ip,
password => $anvil_password,
shell_call => $drbd_md_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
# Reload the config.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0373"});
$anvil->Job->update_progress({
progress => 85,
message => "job_0373",
});
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." adjust ".$server_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid)
{
# "Peer" in this context is either a node or a DR host
next if $this_host_uuid eq $anvil->Get->host_uuid();
my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name};
my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address};
my $variables = {
server => $server_name,
host_name => $peer_host_name,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0374", variables => $variables});
$anvil->Job->update_progress({
progress => 85,
message => "job_0374",
variables => $variables,
});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_sn_ip,
password => $anvil_password,
shell_call => $shell_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
# If the resource is down, bring it up.
$variables = { server => $server_name };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0375", variables => $variables});
$anvil->Job->update_progress({
progress => 88,
message => "job_0375",
variables => $variables,
});
my $drbd_up_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." || ".$anvil->data->{path}{exe}{drbdadm}." up ".$server_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_up_call => $drbd_up_call }});
($output, $return_code) = $anvil->System->call({shell_call => $drbd_up_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid)
{
# "Peer" in this context is either a node or a DR host
next if $this_host_uuid eq $anvil->Get->host_uuid();
my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name};
my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address};
$variables = { host_name => $peer_host_name };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0376", variables => $variables});
$anvil->Job->update_progress({
progress => 90,
message => "job_0376",
variables => $variables,
});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_sn_ip,
password => $anvil_password,
shell_call => $drbd_up_call,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
# Now watch until the DR host shows up
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0377"});
$anvil->Job->update_progress({
progress => 92,
message => "job_0377",
});
my $waiting = 1;
while($waiting)
{
sleep 5;
$anvil->DRBD->gather_data({debug => 2});
my $dr_seen = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume => $volume }});
if (exists $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name})
{
my $local_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{local_role};
my $local_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{local_disk_state};
my $peer_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{peer_role};
my $peer_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$dr1_short_host_name}{peer_disk_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_role => $local_role,
local_disk_state => $local_disk_state,
peer_role => $peer_role,
peer_disk_state => $peer_disk_state,
}});
}
else
{
# Not up yet.
my $next_check = $anvil->Get->date_and_time({offset => 5, time_only => 1});
my $variables = { next_check => $next_check };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0378", variables => $variables});
$anvil->Job->update_progress({
progress => 95,
message => "job_0378",
variables => $variables,
});
$dr_seen = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { dr_seen => $dr_seen }});
last;
}
}
if ($dr_seen)
{
# We're ready.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0379"});
$anvil->Job->update_progress({
progress => 98,
message => "job_0379",
});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
# Done!
$variables = { server => $server_name };
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0380", variables => $variables});
$anvil->Job->update_progress({
progress => 100,
message => "job_0380",
variables => $variables,
});
return(0);
}
sub load_job
{
my ($anvil) = @_;
$anvil->Job->clear();
$anvil->Job->get_job_details();
$anvil->Job->update_progress({
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0263",
});
print "Loading the job: [".$anvil->data->{switches}{'job-uuid'}."]...\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"jobs::job_command" => $anvil->data->{jobs}{job_command},
"jobs::job_data" => $anvil->data->{jobs}{job_data},
"jobs::job_progress" => $anvil->data->{jobs}{job_progress},
"jobs::job_status" => $anvil->data->{jobs}{job_status},
}});
# Break up the job data into switches.
$anvil->data->{switches}{Yes} = 1;
print "- Set the switch: [--Yes] to true.\n";
foreach my $line (split/\n/, $anvil->data->{jobs}{job_data})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(.*?)=(.*)$/)
{
my $key = $1;
my $value = $2;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
key => $key,
value => $value,
}});
$anvil->data->{switches}{$key} = $value;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"switches::${key}" => $anvil->data->{switches}{$key},
}});
print "* Set the switch: [--".$key."] to: [".$value."]\n";
}
}
print "Job loaded successfully.\n\n";
return(0);
}