You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
789 lines
30 KiB
789 lines
30 KiB
#!/usr/bin/perl |
|
# |
|
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers. |
|
# |
|
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes |
|
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool |
|
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' -> |
|
# 'host_uuid' table. |
|
# |
|
# Exit codes; |
|
# 0 = Normal exit. |
|
# 1 = Any problem that causes an early exit. |
|
# |
|
# TODO: |
|
# - Add job support |
|
# - Make this work on DR hosts. |
|
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts. |
|
# - Start DRBD resources if the VMs are running already on the peer. |
|
# - Check that the installed kernel-headers matches the running kernel and, if not, check with grubby to |
|
# ensure the right kernel is set to boot. Then alert the user to a likely need to reboot. |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
use NetAddr::IP; |
|
require POSIX; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
my $anvil = Anvil::Tools->new(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); |
|
|
|
# Read switches |
|
$anvil->Get->switches({list => [], man => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); |
|
|
|
### TODO: Remove before PR! |
|
# Force up the logging. |
|
$anvil->Log->level({set => 2}); |
|
$anvil->Log->secure({set => 1}); |
|
|
|
# Make sure we're running as 'root' |
|
# $< == real UID, $> == effective UID |
|
if (($< != 0) && ($> != 0)) |
|
{ |
|
# Not root |
|
print $anvil->Words->string({key => "error_0005"})."\n"; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# If dnf is running, hold. |
|
$anvil->System->wait_on_dnf(); |
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks |
|
# is to setup the database server. |
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); |
|
|
|
# If I have no databases, sleep until I do |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a |
|
# Wait until we have one. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); |
|
|
|
until($anvil->data->{sys}{database}{connections}) |
|
{ |
|
sleep 10; |
|
|
|
$anvil->refresh(); |
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# Keep waiting |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, secure => 0, key => "log_0439"}); |
|
} |
|
} |
|
} |
|
|
|
### Process |
|
# 1. Check if I am enabled and that no other copies are running. |
|
# 2. Can I ping my peer on all three networks? Loop until true. |
|
# - Wait here indefinately |
|
# 3. ... |
|
# 6. Using Start Groups/Delays (and ignoring 'clean' off VMs), boot servers. |
|
|
|
|
|
# Check to see if we should run. Also checks/sets enable/disable requests. |
|
prerun_checks($anvil); |
|
|
|
# Wait until I can ping the peer on all networks. This will not return until access is available on all |
|
# networks. There is no timeout. |
|
wait_for_access($anvil); |
|
|
|
# Start pacemaker now. |
|
start_pacemaker($anvil); |
|
|
|
# Boot servers. |
|
boot_servers($anvil); |
|
|
|
# Start DRBD resources locally for VMs running on the peer already |
|
check_drbd($anvil); |
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"}); |
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
sub check_drbd |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Find the servers running on the peer. |
|
my $short_host_name = $anvil->Get->short_host_name(); |
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid}; |
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name}; |
|
my $peer_password = $anvil->data->{sys}{peer_password}; |
|
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:short_host_name' => $short_host_name, |
|
's2:peer_host_uuid' => $peer_host_uuid, |
|
's3:peer_short_host_name' => $peer_short_host_name, |
|
's4:peer_password' => $anvil->Log->is_secure($peer_password), |
|
's5:peer_ip_address' => $peer_ip_address, |
|
}}); |
|
|
|
# Get the list of resources up on the peer. |
|
$anvil->DRBD->get_status({debug => 2}); |
|
$anvil->DRBD->get_status({ |
|
debug => 2, |
|
password => $peer_password, |
|
target => $peer_ip_address, |
|
}); |
|
|
|
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }}); |
|
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}}) |
|
{ |
|
my $peer_is_me = $anvil->Network->is_local({host => $peer_name}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:peer_name' => $peer_name, |
|
's2:peer_is_me' => $peer_is_me, |
|
}}); |
|
|
|
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'}; |
|
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : ""; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:peer_connection_state' => $peer_connection_state, |
|
's2:local_connection_state' => $local_connection_state, |
|
}}); |
|
|
|
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i)) |
|
{ |
|
# Start the DRBD resource locally. |
|
my $return_code = $anvil->DRBD->manage_resource({ |
|
debug => 2, |
|
resource => $resource, |
|
task => "up", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }}); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# This boots the servers. |
|
sub boot_servers |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Call 'anvil-boot-server --server all' to boot the servers now. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0614"}); |
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-boot-server'}." --server all".$anvil->Log->switches; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
if ($return_code) |
|
{ |
|
# What?! Fail out, we're done. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0275", variables => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# Start pacemaker and wait until we're quorate. |
|
sub start_pacemaker |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; |
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid}; |
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name}; |
|
my $fenced_peer = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
anvil_uuid => $anvil_uuid, |
|
host_uuid => $host_uuid, |
|
short_host_name => $short_host_name, |
|
peer_host_uuid => $peer_host_uuid, |
|
peer_short_host_name => $peer_short_host_name, |
|
}}); |
|
|
|
# Is pacemaker already running? |
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
|
|
if ($problem) |
|
{ |
|
# Nope, start it. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"}); |
|
|
|
# NOTE: In some odd cases, this can try to run before /etc/hosts has been populated. So wait |
|
# until we can access ourself. |
|
my $ok = 0; |
|
until ($ok) |
|
{ |
|
# Convert out short host name to an IP and verify that the IP is one of ours. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0681"}); |
|
my $local_bcn1_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $short_host_name}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_bcn1_ip => $local_bcn1_ip }}); |
|
|
|
if ($local_bcn1_ip) |
|
{ |
|
# Is this one of our IPs, or is DNS being a little shit? |
|
if (exists $anvil->data->{network}{$short_host_name}) |
|
{ |
|
delete $anvil->data->{network}{$short_host_name}; |
|
} |
|
$anvil->Network->get_ips(); |
|
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}}) |
|
{ |
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} eq ""; |
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} =~ /^127\.0\.0\./; |
|
my $this_ip = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:interface' => $interface, |
|
's2:this_ip' => $this_ip, |
|
}}); |
|
if ($this_ip eq $local_bcn1_ip) |
|
{ |
|
$ok = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ok => $ok }}); |
|
last; |
|
} |
|
} |
|
} |
|
|
|
if (not $ok) |
|
{ |
|
# Sleep 10 seconds. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0134"}); |
|
sleep 10; |
|
} |
|
} |
|
|
|
### TODO: A lot more testing is needed for degraded single-node start later. |
|
### Should we use --all, or wait for our peer? For now, we wait. |
|
### NOTE: This can be racy during initial setup, calling the start before /etc/hosts is |
|
### populated. So this watches for that corner case. |
|
my $wait_until = time + 120; |
|
my $waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
wait_until => $wait_until, |
|
waiting => $waiting, |
|
}}); |
|
while($waiting) |
|
{ |
|
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all"; |
|
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); |
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
if ($return_code) |
|
{ |
|
# Are we done waiting? |
|
if (time > $wait_until) |
|
{ |
|
# We're done. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0256", variables => { |
|
output => $output, |
|
return_code => $return_code, |
|
}}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
else |
|
{ |
|
# Report the error and sleep |
|
my $time_left = $wait_until - time; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "alert", key => "warning_0153", variables => { |
|
output => $output, |
|
return_code => $return_code, |
|
time_left => $time_left, |
|
waiting => 10, |
|
}}); |
|
sleep 10; |
|
} |
|
} |
|
else |
|
{ |
|
# Success! |
|
$waiting = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
} |
|
} |
|
|
|
### TODO: We may implement the logic to fence our peer (similar to cman's post_join_delay' |
|
### logic) at a later time. For now, we'll wait forever for this to exit. This is why |
|
### we set 'wait_for_peer', even though it's not used yet. |
|
# Now wait up to two minutes for the cluster to start. If it's not up by then, we'll fence |
|
# the peer and, if the fence succeeds, unblock quorum. |
|
my $start_time = time; |
|
my $wait_for_peer = $start_time + 120; |
|
$waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
start_time => $start_time, |
|
wait_for_peer => $wait_for_peer, |
|
}}); |
|
while ($waiting) |
|
{ |
|
$waiting = 0; |
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 2}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
if ($problem) |
|
{ |
|
# Can't parse the CIB yet, wait. |
|
$waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
} |
|
else |
|
{ |
|
# Quorum, as reported in the CIB, sets 'have-quorum to '1' as soon as it |
|
# starts, the retracts it. For this reason, we use 'parse_quorum()' to get |
|
# the quorum directly from corosync/votequorum. |
|
my ($problem) = $anvil->Cluster->parse_quorum({debug => 2}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
if ($problem) |
|
{ |
|
# Corosync is down. |
|
$waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
} |
|
else |
|
{ |
|
### NOTE: We don't worry about maintenance mode yet, as it shouldn't |
|
### apply, but we may change that view later. |
|
# See where we are. |
|
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name}; |
|
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'}; |
|
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm}; |
|
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd}; |
|
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'}; |
|
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}; |
|
my $quorate = $anvil->data->{quorum}{quorate}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:node_name' => $node_name, |
|
's2:maintenance_mode' => $maintenance_mode, |
|
's3:in_ccm/crmd/join' => $in_ccm."/".$crmd."/".$join, |
|
's4:ready' => $ready, |
|
's5:quorate' => $quorate, |
|
}}); |
|
|
|
# Are we online? |
|
if ($ready) |
|
{ |
|
# We're ready, but do we have quorum? |
|
if ($quorate) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0611", variables => { node_name => $node_name }}); |
|
} |
|
else |
|
{ |
|
# Nope |
|
$waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
|
|
# Keep waiting, or fence the peer? |
|
if (time > $wait_for_peer) |
|
{ |
|
### TODO: See above, not implemented yet. Do we want to do this? If so: |
|
# Time to fence. Use 'pcs stonith fence <peer>', verify it succeeded, |
|
# then do 'pcs quorum unblock --force' to finish startup. |
|
} |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0610", variables => { node_name => $node_name }}); |
|
} |
|
} |
|
else |
|
{ |
|
# Not ready yet. |
|
$waiting = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0612", variables => { |
|
node_name => $node_name, |
|
in_ccm => $in_ccm, |
|
crmd => $crmd, |
|
'join' => $join, |
|
}}); |
|
} |
|
} |
|
} |
|
|
|
if ($waiting) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0609"}); |
|
sleep 5; |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
# Check for which networks we have and verify that we can ping our peer on each. This function will not |
|
# return until all networks are up. |
|
sub wait_for_access |
|
{ |
|
my ($anvil) = @_; |
|
|
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; |
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid}; |
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name}; |
|
my $peer_password = $anvil->data->{sys}{peer_password}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
host_uuid => $host_uuid, |
|
short_host_name => $short_host_name, |
|
peer_host_uuid => $peer_host_uuid, |
|
peer_short_host_name => $peer_short_host_name, |
|
peer_password => $anvil->Log->is_secure($peer_password), |
|
}}); |
|
|
|
my $waiting = 1; |
|
while ($waiting) |
|
{ |
|
# This will get set back to '1' if |
|
$waiting = 0; |
|
|
|
# Load IPs (again, to catch changes that might be delaying startup) |
|
$anvil->Network->load_ips({ |
|
clear => 1, |
|
host => $short_host_name, |
|
host_uuid => $host_uuid, |
|
|
|
}); |
|
$anvil->Network->load_ips({ |
|
clear => 1, |
|
host => $peer_short_host_name, |
|
host_uuid => $peer_host_uuid, |
|
|
|
}); |
|
|
|
# Loop through our interfaces and then loop our peers. Test access over them and set |
|
# 'waiting' back to '1' if the connection fails. |
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}}) |
|
{ |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
interface => $interface, |
|
waiting => $waiting, |
|
}}); |
|
|
|
# Only care about our networks. |
|
next if $waiting; |
|
if (not $anvil->Network->is_our_interface({interface => $interface})) |
|
{ |
|
# Not an interface we care about |
|
next; |
|
} |
|
|
|
my $this_network = ($interface =~ /^(.*?)_/)[0]; |
|
my $ip_address = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip}; |
|
my $subnet_mask = $anvil->data->{network}{$short_host_name}{interface}{$interface}{subnet_mask}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:this_network' => $this_network, |
|
's2:ip_address' => $ip_address, |
|
's3:subnet_mask' => $subnet_mask, |
|
}}); |
|
|
|
### NOTE: I know I could match interface names, but that's not certain enough. It's |
|
### possible (if unlikely) that the network name+numbre differs on our peer. So |
|
### this is safer. |
|
# Loop through my peer's interfaces and see if we're sharing this one. |
|
my $local_network = NetAddr::IP->new($ip_address."/".$subnet_mask); |
|
my $peer_match_found = 0; |
|
foreach my $peer_interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$peer_short_host_name}{interface}}) |
|
{ |
|
last if $peer_match_found; |
|
my $peer_ip_address = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{ip}; |
|
my $peer_subnet_mask = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{subnet_mask}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
peer_interface => $peer_interface, |
|
peer_ip_address => $peer_ip_address, |
|
peer_subnet_mask => $peer_subnet_mask, |
|
}}); |
|
|
|
# This the matching network? |
|
next if $subnet_mask ne $peer_subnet_mask; |
|
|
|
my $peer_network = NetAddr::IP->new($peer_ip_address."/".$peer_subnet_mask); |
|
if ($peer_network->within($local_network)) |
|
{ |
|
# Match, test access. |
|
$peer_match_found = 1; |
|
my $access = $anvil->Remote->test_access({ |
|
target => $peer_ip_address, |
|
password => $peer_password, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); |
|
if ($access) |
|
{ |
|
# This network is good. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0604", variables => { |
|
peer => $peer_short_host_name, |
|
network => $this_network, |
|
peer_ip => $peer_ip_address, |
|
}}); |
|
|
|
$anvil->data->{sys}{peer_target_ip} = $peer_ip_address; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"sys::peer_target_ip" => $anvil->data->{sys}{peer_target_ip}, |
|
}}); |
|
} |
|
else |
|
{ |
|
# No access, wait and try it again. |
|
$waiting = 1; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0605", variables => { |
|
peer => $peer_short_host_name, |
|
network => $this_network, |
|
peer_ip => $peer_ip_address, |
|
}}); |
|
} |
|
} |
|
|
|
} |
|
} |
|
|
|
if ($waiting) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0606", variables => { peer => $peer_short_host_name }}); |
|
sleep 5; |
|
} |
|
} |
|
|
|
# All networks are up. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0607", variables => { peer => $peer_short_host_name }}); |
|
|
|
return(0); |
|
} |
|
|
|
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't |
|
# a node, this method will exit. |
|
sub prerun_checks |
|
{ |
|
my ($anvil) = @_; |
|
|
|
$anvil->Database->get_hosts(); |
|
$anvil->Database->get_anvils(); |
|
|
|
my $host_uuid = $anvil->Get->host_uuid(); |
|
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
host_uuid => $host_uuid, |
|
host_type => $host_type, |
|
}}); |
|
|
|
if (($host_type eq "node") or ($host_type eq "dr")) |
|
{ |
|
# Call DRBD->get_status because, if we're just starting up and the kernel module needs to be |
|
# built, do it before we start calling scan agents. |
|
$anvil->DRBD->get_status({debug => 2}); |
|
} |
|
|
|
if ($host_type ne "node") |
|
{ |
|
# We're done. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); |
|
|
|
if (not $anvil_uuid) |
|
{ |
|
# This is a node, but not in an Anvil! yet. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; |
|
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
node1_host_uuid => $node1_host_uuid, |
|
node2_host_uuid => $node2_host_uuid, |
|
}}); |
|
|
|
$anvil->data->{sys}{anvil_uuid} = $anvil_uuid; |
|
$anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; |
|
$anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"sys::anvil_uuid" => $anvil->data->{sys}{anvil_uuid}, |
|
"sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid}, |
|
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}), |
|
}}); |
|
|
|
# We don't use this anymore, it's managed by the 'anvil-safe-start.service' daemon. |
|
=cut |
|
# Are we being asked to enable or disable? |
|
my $nodes = [$host_uuid]; |
|
my $set_to = 1; |
|
my $message = ""; |
|
if ($anvil->data->{switches}{enable}) |
|
{ |
|
# We're enabling, which message will we use? |
|
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600"; |
|
} |
|
elsif ($anvil->data->{switches}{disable}) |
|
{ |
|
# We're disabling. Which message? |
|
$set_to = 0; |
|
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602"; |
|
} |
|
|
|
# If we're updating the settings, do so and then exit. |
|
if ($message) |
|
{ |
|
if (not $anvil->data->{switches}{'local'}) |
|
{ |
|
# Add our peer as well. |
|
push @{$nodes}, $anvil->data->{sys}{peer_host_uuid}; |
|
} |
|
foreach my $host_uuid (@{$nodes}) |
|
{ |
|
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ |
|
debug => 3, |
|
variable_name => "tool::anvil-safe-start::enabled", |
|
variable_value => $set_to, |
|
variable_default => 1, |
|
variable_description => "striker_0286", |
|
variable_section => "system", |
|
variable_source_uuid => $host_uuid, |
|
variable_source_table => "hosts", |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); |
|
} |
|
|
|
# Record that it's been enabled. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
# Read my variables. |
|
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ |
|
debug => 3, |
|
variable_name => "tool::anvil-safe-start::enabled", |
|
variable_source_table => "hosts", |
|
variable_source_uuid => $host_uuid, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
local_enabled => $local_enabled, |
|
variable_uuid => $variable_uuid, |
|
}}); |
|
# No UUID means the value hasn't been recorded, so we default to 1. |
|
if (not $variable_uuid) |
|
{ |
|
$local_enabled = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }}); |
|
} |
|
|
|
# Have we just been asked for the status? |
|
if ($anvil->data->{switches}{status}) |
|
{ |
|
# Yes, check our peer as well. |
|
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ |
|
debug => 3, |
|
variable_name => "tool::anvil-safe-start::enabled", |
|
variable_source_table => "hosts", |
|
variable_source_uuid => $anvil->data->{sys}{peer_host_uuid}, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
peer_enabled => $peer_enabled, |
|
variable_uuid => $variable_uuid, |
|
}}); |
|
# No UUID means the value hasn't been recorded, so we default to 1. |
|
if (not $variable_uuid) |
|
{ |
|
$peer_enabled = 1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }}); |
|
} |
|
|
|
# What we tell the use slightly depends on which nodes are enabled. |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
local_enabled => $local_enabled, |
|
peer_enabled => $peer_enabled, |
|
}}); |
|
|
|
my $message = ""; |
|
if (($local_enabled) && ($peer_enabled)) |
|
{ |
|
# Both nodes are enabled. |
|
$message = "message_0227"; |
|
} |
|
elsif ((not $local_enabled) && (not $peer_enabled)) |
|
{ |
|
# Both nodes are disabled |
|
$message = "message_0228"; |
|
} |
|
elsif ($local_enabled) |
|
{ |
|
# We're enabled, the peer is disabled. |
|
$message = "message_0229"; |
|
} |
|
else |
|
{ |
|
# We're disabled, the peer is enabled. |
|
$message = "message_0230"; |
|
} |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
=cut |
|
|
|
# Is another instance running? |
|
my $pids = $anvil->System->pids({ |
|
debug => 3, |
|
ignore_me => 1, |
|
program_name => $THIS_FILE, |
|
}); |
|
my $other_instances = @{$pids}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }}); |
|
|
|
if ($other_instances) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233", variables => { program => $THIS_FILE }}); |
|
sleep 2; |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
|
|
=cut |
|
# Last test, enabled or forced? |
|
if (not $local_enabled) |
|
{ |
|
# Disabled. Forced? |
|
if ($anvil->data->{switches}{force}) |
|
{ |
|
# Forced, run anyway. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"}); |
|
return(0); |
|
} |
|
else |
|
{ |
|
# Exit. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"}); |
|
$anvil->nice_exit({exit_code => 0}); |
|
} |
|
} |
|
=cut |
|
|
|
return(0); |
|
}
|
|
|