|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers.
|
|
|
|
#
|
|
|
|
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes
|
|
|
|
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool
|
|
|
|
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' ->
|
|
|
|
# 'host_uuid' table.
|
|
|
|
#
|
|
|
|
# Exit codes;
|
|
|
|
# 0 = Normal exit.
|
|
|
|
# 1 = Any problem that causes an early exit.
|
|
|
|
#
|
|
|
|
# TODO:
|
|
|
|
# - Add job support
|
|
|
|
# - Make this work on DR hosts.
|
|
|
|
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts.
|
|
|
|
# - Start DRBD resources if the VMs are running already on the peer.
|
|
|
|
#
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use Anvil::Tools;
|
|
|
|
use NetAddr::IP;
|
|
|
|
require POSIX;
|
|
|
|
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
|
|
{
|
|
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
my $anvil = Anvil::Tools->new();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
|
|
|
|
# Make sure we're running as 'root'
|
|
|
|
# $< == real UID, $> == effective UID
|
|
|
|
if (($< != 0) && ($> != 0))
|
|
|
|
{
|
|
|
|
# Not root
|
|
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
|
|
# is to setup the database server.
|
|
|
|
$anvil->Database->connect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
|
|
|
|
|
|
|
|
$anvil->data->{switches}{'job-uuid'} = "";
|
|
|
|
$anvil->data->{switches}{disable} = "";
|
|
|
|
$anvil->data->{switches}{enable} = "";
|
|
|
|
$anvil->data->{switches}{force} = "";
|
|
|
|
$anvil->data->{switches}{'local'} = "";
|
|
|
|
$anvil->data->{switches}{status} = "";
|
|
|
|
$anvil->Get->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
|
|
|
|
'switches::disable' => $anvil->data->{switches}{disable},
|
|
|
|
'switches::enable' => $anvil->data->{switches}{enable},
|
|
|
|
'switches::force' => $anvil->data->{switches}{force},
|
|
|
|
'switches::local' => $anvil->data->{switches}{'local'},
|
|
|
|
'switches::status' => $anvil->data->{switches}{status},
|
|
|
|
}});
|
|
|
|
|
|
|
|
# If I have no databases, sleep until I do
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
|
|
# Wait until we have one.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"});
|
|
|
|
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
sleep 10;
|
|
|
|
|
|
|
|
$anvil->refresh();
|
|
|
|
$anvil->Database->connect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Keep waiting
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, secure => 0, key => "log_0439"});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
### Process
|
|
|
|
# 1. Check if I am enabled and that no other copies are running.
|
|
|
|
# 2. Can I ping my peer on all three networks? Loop until true.
|
|
|
|
# - Wait here indefinately
|
|
|
|
# 3. ...
|
|
|
|
# 6. Using Start Groups/Delays (and ignoring 'clean' off VMs), boot servers.
|
|
|
|
|
|
|
|
|
|
|
|
# Check to see if we should run. Also checks/sets enable/disable requests.
|
|
|
|
prerun_checks($anvil);
|
|
|
|
|
|
|
|
# Wait until I can ping the peer on all three networks. This will not return until access is available on all
|
|
|
|
# networks. There is no timeout.
|
|
|
|
wait_for_access($anvil);
|
|
|
|
|
|
|
|
# Start pacemaker now.
|
|
|
|
start_pacemaker($anvil);
|
|
|
|
|
|
|
|
# Boot servers.
|
|
|
|
boot_servers($anvil);
|
|
|
|
|
|
|
|
# Start DRBD resources locally for VMs running on the peer already
|
|
|
|
check_drbd($anvil);
|
|
|
|
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"});
|
|
|
|
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
|
|
|
|
#############################################################################################################
|
|
|
|
# Functions #
|
|
|
|
#############################################################################################################
|
|
|
|
|
|
|
|
sub check_drbd
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Find the servers running on the peer.
|
|
|
|
my $short_host_name = $anvil->Get->short_host_name();
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
|
|
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
short_host_name => $short_host_name,
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
|
|
peer_ip_address => $peer_ip_address,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Get the list of resources up on the peer.
|
|
|
|
$anvil->DRBD->get_status({debug => 2});
|
|
|
|
$anvil->DRBD->get_status({
|
|
|
|
debug => 2,
|
|
|
|
password => $peer_password,
|
|
|
|
target => $peer_ip_address,
|
|
|
|
});
|
|
|
|
|
|
|
|
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}})
|
|
|
|
{
|
|
|
|
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}})
|
|
|
|
{
|
|
|
|
my $peer_is_me = $anvil->Network->is_local({host => $peer_name});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
|
|
peer_ip_address => $peer_ip_address,
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'};
|
|
|
|
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : "";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_connection_state => $peer_connection_state,
|
|
|
|
local_connection_state => $local_connection_state,
|
|
|
|
}});
|
|
|
|
|
|
|
|
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i))
|
|
|
|
{
|
|
|
|
# Start the DRBD resource locally.
|
|
|
|
my $return_code = $anvil->DRBD->manage_resource({
|
|
|
|
debug => 2,
|
|
|
|
resource => $resource,
|
|
|
|
task => "up",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This boots the servers.
|
|
|
|
sub boot_servers
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
# Call 'anvil-boot-server --server all' to boot the servers now.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0614"});
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-boot-server'}." --server all".$anvil->Log->switches;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
if ($return_code)
|
|
|
|
{
|
|
|
|
# What?! Fail out, we're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0275", variables => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Start pacemaker and wait until we're quorate.
|
|
|
|
sub start_pacemaker
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $fenced_peer = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
anvil_uuid => $anvil_uuid,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
short_host_name => $short_host_name,
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Is pacemaker already running?
|
|
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Nope, start it.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"});
|
|
|
|
|
|
|
|
# NOTE: In some odd cases, this can try to run before /etc/hosts has been populated. So wait
|
|
|
|
# until we can access ourself.
|
|
|
|
my $ok = 0;
|
|
|
|
until ($ok)
|
|
|
|
{
|
|
|
|
# Convert out short host name to an IP and verify that the IP is one of ours.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0681"});
|
|
|
|
my $local_bcn1_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $short_host_name});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_bcn1_ip => $local_bcn1_ip }});
|
|
|
|
|
|
|
|
if ($local_bcn1_ip)
|
|
|
|
{
|
|
|
|
# Is this one of our IPs, or is DNS being a little shit?
|
|
|
|
if (exists $anvil->data->{network}{$short_host_name})
|
|
|
|
{
|
|
|
|
delete $anvil->data->{network}{$short_host_name};
|
|
|
|
}
|
|
|
|
$anvil->Network->get_ips();
|
|
|
|
|
|
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} eq "";
|
|
|
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} =~ /^127\.0\.0\./;
|
|
|
|
my $this_ip = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:interface' => $interface,
|
|
|
|
's2:this_ip' => $this_ip,
|
|
|
|
}});
|
|
|
|
if ($this_ip eq $local_bcn1_ip)
|
|
|
|
{
|
|
|
|
$ok = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ok => $ok }});
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (not $ok)
|
|
|
|
{
|
|
|
|
# Sleep 10 seconds.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0134"});
|
|
|
|
sleep 10;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
### TODO: A lot more testing is needed for degraded single-node start later.
|
|
|
|
### Should we use --all, or wait for our peer? For now, we wait.
|
|
|
|
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
if ($return_code)
|
|
|
|
{
|
|
|
|
# What?! Fail out, we're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0256", variables => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
### TODO: We may implement the logic to fence our peer (similar to cman's post_join_delay'
|
|
|
|
### logic) at a later time. For now, we'll wait forever for this to exit. This is why
|
|
|
|
### we set 'wait_for_peer', even though it's not used yet.
|
|
|
|
# Now wait up to two minutes for the cluster to start. If it's not up by then, we'll fence
|
|
|
|
# the peer and, if the fence succeeds, unblock quorum.
|
|
|
|
my $start_time = time;
|
|
|
|
my $wait_for_peer = $start_time + 120;
|
|
|
|
my $waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
start_time => $start_time,
|
|
|
|
wait_for_peer => $wait_for_peer,
|
|
|
|
}});
|
|
|
|
while ($waiting)
|
|
|
|
{
|
|
|
|
$waiting = 0;
|
|
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Can't parse the CIB yet, wait.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Quorum, as reported in the CIB, sets 'have-quorum to '1' as soon as it
|
|
|
|
# starts, the retracts it. For this reason, we use 'parse_quorum()' to get
|
|
|
|
# the quorum directly from corosync/votequorum.
|
|
|
|
my ($problem) = $anvil->Cluster->parse_quorum({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Corosync is down.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
### NOTE: We don't worry about maintenance mode yet, as it shouldn't
|
|
|
|
### apply, but we may change that view later.
|
|
|
|
# See where we are.
|
|
|
|
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name};
|
|
|
|
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
|
|
|
|
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
|
|
|
|
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
|
|
|
|
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
|
|
|
|
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
|
|
|
|
my $quorate = $anvil->data->{quorum}{quorate};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:node_name' => $node_name,
|
|
|
|
's2:maintenance_mode' => $maintenance_mode,
|
|
|
|
's3:in_ccm/crmd/join' => $in_ccm."/".$crmd."/".$join,
|
|
|
|
's4:ready' => $ready,
|
|
|
|
's5:quorate' => $quorate,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Are we online?
|
|
|
|
if ($ready)
|
|
|
|
{
|
|
|
|
# We're ready, but do we have quorum?
|
|
|
|
if ($quorate)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0611", variables => { node_name => $node_name }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Nope
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
|
|
|
|
# Keep waiting, or fence the peer?
|
|
|
|
if (time > $wait_for_peer)
|
|
|
|
{
|
|
|
|
### TODO: See above, not implemented yet. Do we want to do this? If so:
|
|
|
|
# Time to fence. Use 'pcs stonith fence <peer>', verify it succeeded,
|
|
|
|
# then do 'pcs quorum unblock --force' to finish startup.
|
|
|
|
}
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0610", variables => { node_name => $node_name }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Not ready yet.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0612", variables => {
|
|
|
|
node_name => $node_name,
|
|
|
|
in_ccm => $in_ccm,
|
|
|
|
crmd => $crmd,
|
|
|
|
'join' => $join,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($waiting)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0609"});
|
|
|
|
sleep 5;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check for which networks we have and verify that we can ping our peer on each. This function will not
|
|
|
|
# return until all networks are up.
|
|
|
|
sub wait_for_access
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
short_host_name => $short_host_name,
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $waiting = 1;
|
|
|
|
while ($waiting)
|
|
|
|
{
|
|
|
|
# This will get set back to '1' if
|
|
|
|
$waiting = 0;
|
|
|
|
|
|
|
|
# Load IPs (again, to catch changes that might be delaying startup)
|
|
|
|
$anvil->Network->load_ips({
|
|
|
|
clear => 1,
|
|
|
|
host => $short_host_name,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
|
|
|
|
});
|
|
|
|
$anvil->Network->load_ips({
|
|
|
|
clear => 1,
|
|
|
|
host => $peer_short_host_name,
|
|
|
|
host_uuid => $peer_host_uuid,
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
# Loop through our interfaces and then loop our peers. Test access over them and set
|
|
|
|
# 'waiting' back to '1' if the connection fails.
|
|
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
interface => $interface,
|
|
|
|
waiting => $waiting,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Only care about our networks.
|
|
|
|
next if $waiting;
|
|
|
|
if (not $anvil->Network->is_our_interface({interface => $interface}))
|
|
|
|
{
|
|
|
|
# Not an interface we care about
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $this_network = ($interface =~ /^(.*?)_/)[0];
|
|
|
|
my $ip_address = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
|
|
|
|
my $subnet_mask = $anvil->data->{network}{$short_host_name}{interface}{$interface}{subnet_mask};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:this_network' => $this_network,
|
|
|
|
's2:ip_address' => $ip_address,
|
|
|
|
's3:subnet_mask' => $subnet_mask,
|
|
|
|
}});
|
|
|
|
|
|
|
|
### NOTE: I know I could match interface names, but that's not certain enough. It's
|
|
|
|
### possible (if unlikely) that the network name+numbre differs on our peer. So
|
|
|
|
### this is safer.
|
|
|
|
# Loop through my peer's interfaces and see if we're sharing this one.
|
|
|
|
my $local_network = NetAddr::IP->new($ip_address."/".$subnet_mask);
|
|
|
|
my $peer_match_found = 0;
|
|
|
|
foreach my $peer_interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$peer_short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
last if $peer_match_found;
|
|
|
|
my $peer_ip_address = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{ip};
|
|
|
|
my $peer_subnet_mask = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{subnet_mask};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_interface => $peer_interface,
|
|
|
|
peer_ip_address => $peer_ip_address,
|
|
|
|
peer_subnet_mask => $peer_subnet_mask,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# This the matching network?
|
|
|
|
next if $subnet_mask ne $peer_subnet_mask;
|
|
|
|
|
|
|
|
my $peer_network = NetAddr::IP->new($peer_ip_address."/".$peer_subnet_mask);
|
|
|
|
if ($peer_network->within($local_network))
|
|
|
|
{
|
|
|
|
# Match, test access.
|
|
|
|
$peer_match_found = 1;
|
|
|
|
my $access = $anvil->Remote->test_access({
|
|
|
|
target => $peer_ip_address,
|
|
|
|
password => $peer_password,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
|
|
|
|
if ($access)
|
|
|
|
{
|
|
|
|
# This network is good.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0604", variables => {
|
|
|
|
peer => $peer_short_host_name,
|
|
|
|
network => $this_network,
|
|
|
|
peer_ip => $peer_ip_address,
|
|
|
|
}});
|
|
|
|
|
|
|
|
$anvil->data->{sys}{peer_target_ip} = $peer_ip_address;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"sys::peer_target_ip" => $anvil->data->{sys}{peer_target_ip},
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# No access, wait and try it again.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0605", variables => {
|
|
|
|
peer => $peer_short_host_name,
|
|
|
|
network => $this_network,
|
|
|
|
peer_ip => $peer_ip_address,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($waiting)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0606", variables => { peer => $peer_short_host_name }});
|
|
|
|
sleep 5;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# All networks are up.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0607", variables => { peer => $peer_short_host_name }});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't
|
|
|
|
# a node, this method will exit.
|
|
|
|
sub prerun_checks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
$anvil->Database->get_hosts();
|
|
|
|
$anvil->Database->get_anvils();
|
|
|
|
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
host_type => $host_type,
|
|
|
|
}});
|
|
|
|
|
|
|
|
if ($host_type ne "node")
|
|
|
|
{
|
|
|
|
# We're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }});
|
|
|
|
|
|
|
|
if (not $anvil_uuid)
|
|
|
|
{
|
|
|
|
# This is a node, but not in an Anvil! yet.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
|
|
|
|
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
node1_host_uuid => $node1_host_uuid,
|
|
|
|
node2_host_uuid => $node2_host_uuid,
|
|
|
|
}});
|
|
|
|
|
|
|
|
$anvil->data->{sys}{anvil_uuid} = $anvil_uuid;
|
|
|
|
$anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
|
|
|
|
$anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"sys::anvil_uuid" => $anvil->data->{sys}{anvil_uuid},
|
|
|
|
"sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid},
|
|
|
|
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}),
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Are we being asked to enable or disable?
|
|
|
|
my $nodes = [$host_uuid];
|
|
|
|
my $set_to = 1;
|
|
|
|
my $message = "";
|
|
|
|
if ($anvil->data->{switches}{enable})
|
|
|
|
{
|
|
|
|
# We're enabling, which message will we use?
|
|
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600";
|
|
|
|
}
|
|
|
|
elsif ($anvil->data->{switches}{disable})
|
|
|
|
{
|
|
|
|
# We're disabling. Which message?
|
|
|
|
$set_to = 0;
|
|
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602";
|
|
|
|
}
|
|
|
|
|
|
|
|
# If we're updating the settings, do so and then exit.
|
|
|
|
if ($message)
|
|
|
|
{
|
|
|
|
if (not $anvil->data->{switches}{'local'})
|
|
|
|
{
|
|
|
|
# Add our peer as well.
|
|
|
|
push @{$nodes}, $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
}
|
|
|
|
foreach my $host_uuid (@{$nodes})
|
|
|
|
{
|
|
|
|
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({
|
|
|
|
debug => 3,
|
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_value => $set_to,
|
|
|
|
variable_default => 1,
|
|
|
|
variable_description => "striker_0286",
|
|
|
|
variable_section => "system",
|
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Record that it's been enabled.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Read my variables.
|
|
|
|
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
|
|
|
debug => 3,
|
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
local_enabled => $local_enabled,
|
|
|
|
variable_uuid => $variable_uuid,
|
|
|
|
}});
|
|
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
|
|
if (not $variable_uuid)
|
|
|
|
{
|
|
|
|
$local_enabled = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Have we just been asked for the status?
|
|
|
|
if ($anvil->data->{switches}{status})
|
|
|
|
{
|
|
|
|
# Yes, check our peer as well.
|
|
|
|
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
|
|
|
debug => 3,
|
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_source_uuid => $anvil->data->{sys}{peer_host_uuid},
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_enabled => $peer_enabled,
|
|
|
|
variable_uuid => $variable_uuid,
|
|
|
|
}});
|
|
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
|
|
if (not $variable_uuid)
|
|
|
|
{
|
|
|
|
$peer_enabled = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# What we tell the use slightly depends on which nodes are enabled.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
local_enabled => $local_enabled,
|
|
|
|
peer_enabled => $peer_enabled,
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $message = "";
|
|
|
|
if (($local_enabled) && ($peer_enabled))
|
|
|
|
{
|
|
|
|
# Both nodes are enabled.
|
|
|
|
$message = "message_0227";
|
|
|
|
}
|
|
|
|
elsif ((not $local_enabled) && (not $peer_enabled))
|
|
|
|
{
|
|
|
|
# Both nodes are disabled
|
|
|
|
$message = "message_0228";
|
|
|
|
}
|
|
|
|
elsif ($local_enabled)
|
|
|
|
{
|
|
|
|
# We're enabled, the peer is disabled.
|
|
|
|
$message = "message_0229";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# We're disabled, the peer is enabled.
|
|
|
|
$message = "message_0230";
|
|
|
|
}
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Is another instance running?
|
|
|
|
my $pids = $anvil->System->pids({
|
|
|
|
debug => 3,
|
|
|
|
ignore_me => 1,
|
|
|
|
program_name => $THIS_FILE,
|
|
|
|
});
|
|
|
|
my $other_instances = @{$pids};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }});
|
|
|
|
|
|
|
|
if ($other_instances)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Last test, enabled or forced?
|
|
|
|
if (not $local_enabled)
|
|
|
|
{
|
|
|
|
# Disabled. Forced?
|
|
|
|
if ($anvil->data->{switches}{force})
|
|
|
|
{
|
|
|
|
# Forced, run anyway.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"});
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Exit.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|