41cd1e0319
* DRBD is now configured to a ping-timeout of 3 seconds. * Created Log->switches() that returnes the command line switches used by Anvil! tool command line calls based on the active log levels / secure logging. Appended this to all invocations of our tools. * Updated Database->resync_databases() to now only skip 'jobs' and 'variables' tables with less than 10 record differences. All other differences will trigger a resync. * Created System->_check_anvil_conf() that, as you might guess, checks in anvil.conf exists and created it (using defaults), if not. It also checks to see if the 'admin' group and user exists and creates them, if not. * Updated anvil-daemon to check anvil.conf on start up and in each loop. Created the function check_journald() that checks (and sets, if needed) that journald logging is persistent. * Made striker-manage-peers to check_if_configured on the Database->connect() when updating anvil.conf and the target UUID is the local machine. Also created a loop to make the reconnection a lot more robust. Signed-off-by: Digimer <digimer@alteeve.ca>
701 lines
26 KiB
Perl
Executable File
701 lines
26 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers.
|
|
#
|
|
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes
|
|
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool
|
|
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' ->
|
|
# 'host_uuid' table.
|
|
#
|
|
# Exit codes;
|
|
# 0 = Normal exit.
|
|
# 1 = Any problem that causes an early exit.
|
|
#
|
|
# TODO:
|
|
# - Add job support
|
|
# - Make this work on DR hosts.
|
|
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts.
|
|
# - Start DRBD resources if the VMs are running already on the peer.
|
|
#
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Anvil::Tools;
|
|
use NetAddr::IP;
|
|
require POSIX;
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
{
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
}
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
$| = 1;
|
|
|
|
my $anvil = Anvil::Tools->new();
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
# Make sure we're running as 'root'
|
|
# $< == real UID, $> == effective UID
|
|
if (($< != 0) && ($> != 0))
|
|
{
|
|
# Not root
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
$anvil->nice_exit({exit_code => 1});
|
|
}
|
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
# is to setup the database server.
|
|
$anvil->Database->connect();
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
|
|
|
|
$anvil->data->{switches}{'job-uuid'} = "";
|
|
$anvil->data->{switches}{disable} = "";
|
|
$anvil->data->{switches}{enable} = "";
|
|
$anvil->data->{switches}{force} = "";
|
|
$anvil->data->{switches}{'local'} = "";
|
|
$anvil->data->{switches}{status} = "";
|
|
$anvil->Get->switches;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
|
|
'switches::disable' => $anvil->data->{switches}{disable},
|
|
'switches::enable' => $anvil->data->{switches}{enable},
|
|
'switches::force' => $anvil->data->{switches}{force},
|
|
'switches::local' => $anvil->data->{switches}{'local'},
|
|
'switches::status' => $anvil->data->{switches}{status},
|
|
}});
|
|
|
|
# If I have no databases, sleep until I do
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
{
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
# Wait until we have one.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"});
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
{
|
|
sleep 10;
|
|
|
|
$anvil->refresh();
|
|
$anvil->Database->connect();
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
{
|
|
# Keep waiting
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, secure => 0, key => "log_0439"});
|
|
}
|
|
}
|
|
}
|
|
|
|
### Process
|
|
# 1. Check if I am enabled and that no other copies are running.
|
|
# 2. Can I ping my peer on all three networks? Loop until true.
|
|
# - Wait here indefinately
|
|
# 3. ...
|
|
# 6. Using Start Groups/Delays (and ignoring 'clean' off VMs), boot servers.
|
|
|
|
|
|
# Check to see if we should run. Also checks/sets enable/disable requests.
|
|
prerun_checks($anvil);
|
|
|
|
# Wait until I can ping the peer on all three networks. This will not return until access is available on all
|
|
# networks. There is no timeout.
|
|
wait_for_access($anvil);
|
|
|
|
# Start pacemaker now.
|
|
start_pacemaker($anvil);
|
|
|
|
# Boot servers.
|
|
boot_servers($anvil);
|
|
|
|
# Start DRBD resources locally for VMs running on the peer already
|
|
check_drbd($anvil);
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
#############################################################################################################
|
|
# Functions #
|
|
#############################################################################################################
|
|
|
|
sub check_drbd
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Find the servers running on the peer.
|
|
my $short_host_name = $anvil->Get->short_host_name();
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
short_host_name => $short_host_name,
|
|
peer_host_uuid => $peer_host_uuid,
|
|
peer_short_host_name => $peer_short_host_name,
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
peer_ip_address => $peer_ip_address,
|
|
}});
|
|
|
|
# Get the list of resources up on the peer.
|
|
$anvil->DRBD->get_status({debug => 2});
|
|
$anvil->DRBD->get_status({
|
|
debug => 2,
|
|
password => $peer_password,
|
|
target => $peer_ip_address,
|
|
});
|
|
|
|
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}})
|
|
{
|
|
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}})
|
|
{
|
|
my $peer_is_me = $anvil->Network->is_local({host => $peer_name});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
peer_host_uuid => $peer_host_uuid,
|
|
peer_short_host_name => $peer_short_host_name,
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
peer_ip_address => $peer_ip_address,
|
|
}});
|
|
|
|
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'};
|
|
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : "";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
peer_connection_state => $peer_connection_state,
|
|
local_connection_state => $local_connection_state,
|
|
}});
|
|
|
|
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i))
|
|
{
|
|
# Start the DRBD resource locally.
|
|
my $return_code = $anvil->DRBD->manage_resource({
|
|
debug => 2,
|
|
resource => $resource,
|
|
task => "up",
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
|
|
}
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This boots the servers.
|
|
sub boot_servers
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
# Call 'anvil-boot-server --server all' to boot the servers now.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0614"});
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-boot-server'}." --server all".$anvil->Log->switches;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
if ($return_code)
|
|
{
|
|
# What?! Fail out, we're done.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0275", variables => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
$anvil->nice_exit({exit_code => 1});
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# Start pacemaker and wait until we're quorate.
|
|
sub start_pacemaker
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
my $fenced_peer = 0;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
anvil_uuid => $anvil_uuid,
|
|
host_uuid => $host_uuid,
|
|
short_host_name => $short_host_name,
|
|
peer_host_uuid => $peer_host_uuid,
|
|
peer_short_host_name => $peer_short_host_name,
|
|
}});
|
|
|
|
# Is pacemaker already running?
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
if ($problem)
|
|
{
|
|
# Nope, start it.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"});
|
|
|
|
### TODO: A lot more testing is needed for degraded single-node start later.
|
|
### Should we use --all, or wait for our peer? For now, we wait.
|
|
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";
|
|
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
if ($return_code)
|
|
{
|
|
# What?! Fail out, we're done.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0256", variables => {
|
|
output => $output,
|
|
return_code => $return_code,
|
|
}});
|
|
$anvil->nice_exit({exit_code => 1});
|
|
}
|
|
|
|
### TODO: We may implement the logic to fence our peer (similar to cman's post_join_delay'
|
|
### logic) at a later time. For now, we'll wait forever for this to exit. This is why
|
|
### we set 'wait_for_peer', even though it's not used yet.
|
|
# Now wait up to two minutes for the cluster to start. If it's not up by then, we'll fence
|
|
# the peer and, if the fence succeeds, unblock quorum.
|
|
my $start_time = time;
|
|
my $wait_for_peer = $start_time + 120;
|
|
my $waiting = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
start_time => $start_time,
|
|
wait_for_peer => $wait_for_peer,
|
|
}});
|
|
while ($waiting)
|
|
{
|
|
$waiting = 0;
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
if ($problem)
|
|
{
|
|
# Can't parse the CIB yet, wait.
|
|
$waiting = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
}
|
|
else
|
|
{
|
|
# Quorum, as reported in the CIB, sets 'have-quorum to '1' as soon as it
|
|
# starts, the retracts it. For this reason, we use 'parse_quorum()' to get
|
|
# the quorum directly from corosync/votequorum.
|
|
my ($problem) = $anvil->Cluster->parse_quorum({debug => 2});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
if ($problem)
|
|
{
|
|
# Corosync is down.
|
|
$waiting = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
}
|
|
else
|
|
{
|
|
### NOTE: We don't worry about maintenance mode yet, as it shouldn't
|
|
### apply, but we may change that view later.
|
|
# See where we are.
|
|
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name};
|
|
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
|
|
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
|
|
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
|
|
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
|
|
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
|
|
my $quorate = $anvil->data->{quorum}{quorate};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
's1:node_name' => $node_name,
|
|
's2:maintenance_mode' => $maintenance_mode,
|
|
's3:in_ccm/crmd/join' => $in_ccm."/".$crmd."/".$join,
|
|
's4:ready' => $ready,
|
|
's5:quorate' => $quorate,
|
|
}});
|
|
|
|
# Are we online?
|
|
if ($ready)
|
|
{
|
|
# We're ready, but do we have quorum?
|
|
if ($quorate)
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0611", variables => { node_name => $node_name }});
|
|
}
|
|
else
|
|
{
|
|
# Nope
|
|
$waiting = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
# Keep waiting, or fence the peer?
|
|
if (time > $wait_for_peer)
|
|
{
|
|
### TODO: See above, not implemented yet. Do we want to do this? If so:
|
|
# Time to fence. Use 'pcs stonith fence <peer>', verify it succeeded,
|
|
# then do 'pcs quorum unblock --force' to finish startup.
|
|
}
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0610", variables => { node_name => $node_name }});
|
|
}
|
|
}
|
|
else
|
|
{
|
|
# Not ready yet.
|
|
$waiting = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0612", variables => {
|
|
node_name => $node_name,
|
|
in_ccm => $in_ccm,
|
|
crmd => $crmd,
|
|
'join' => $join,
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($waiting)
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0609"});
|
|
sleep 5;
|
|
}
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
# Check for which networks we have and verify that we can ping our peer on each. This function will not
|
|
# return until all networks are up.
|
|
sub wait_for_access
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
host_uuid => $host_uuid,
|
|
short_host_name => $short_host_name,
|
|
peer_host_uuid => $peer_host_uuid,
|
|
peer_short_host_name => $peer_short_host_name,
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
}});
|
|
|
|
my $waiting = 1;
|
|
while ($waiting)
|
|
{
|
|
# This will get set back to '1' if
|
|
$waiting = 0;
|
|
|
|
# Load IPs (again, to catch changes that might be delaying startup)
|
|
$anvil->Network->load_ips({
|
|
clear => 1,
|
|
host => $short_host_name,
|
|
host_uuid => $host_uuid,
|
|
|
|
});
|
|
$anvil->Network->load_ips({
|
|
clear => 1,
|
|
host => $peer_short_host_name,
|
|
host_uuid => $peer_host_uuid,
|
|
|
|
});
|
|
|
|
# Loop through our interfaces and then loop our peers. Test access over them and set
|
|
# 'waiting' back to '1' if the connection fails.
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
|
|
{
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
interface => $interface,
|
|
waiting => $waiting,
|
|
}});
|
|
|
|
# Only care about our networks.
|
|
next if $waiting;
|
|
if (($interface !~ /^bcn/) && ($interface !~ /^sn/) && ($interface !~ /^ifn/))
|
|
{
|
|
# Not an interface we care about
|
|
next;
|
|
}
|
|
|
|
my $this_network = ($interface =~ /^(.*?)_/)[0];
|
|
my $ip_address = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
|
|
my $subnet_mask = $anvil->data->{network}{$short_host_name}{interface}{$interface}{subnet_mask};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
's1:this_network' => $this_network,
|
|
's2:ip_address' => $ip_address,
|
|
's3:subnet_mask' => $subnet_mask,
|
|
}});
|
|
|
|
### NOTE: I know I could match interface names, but that's not certain enough. It's
|
|
### possible (if unlikely) that the network name+numbre differs on our peer. So
|
|
### this is safer.
|
|
# Loop through my peer's interfaces and see if we're sharing this one.
|
|
my $local_network = NetAddr::IP->new($ip_address."/".$subnet_mask);
|
|
my $peer_match_found = 0;
|
|
foreach my $peer_interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$peer_short_host_name}{interface}})
|
|
{
|
|
last if $peer_match_found;
|
|
my $peer_ip_address = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{ip};
|
|
my $peer_subnet_mask = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{subnet_mask};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
peer_interface => $peer_interface,
|
|
peer_ip_address => $peer_ip_address,
|
|
peer_subnet_mask => $peer_subnet_mask,
|
|
}});
|
|
|
|
# This the matching network?
|
|
next if $subnet_mask ne $peer_subnet_mask;
|
|
|
|
my $peer_network = NetAddr::IP->new($peer_ip_address."/".$peer_subnet_mask);
|
|
if ($peer_network->within($local_network))
|
|
{
|
|
# Match, test access.
|
|
$peer_match_found = 1;
|
|
my $access = $anvil->Remote->test_access({
|
|
target => $peer_ip_address,
|
|
password => $peer_password,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
|
|
if ($access)
|
|
{
|
|
# This network is good.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0604", variables => {
|
|
peer => $peer_short_host_name,
|
|
network => $this_network,
|
|
peer_ip => $peer_ip_address,
|
|
}});
|
|
|
|
$anvil->data->{sys}{peer_target_ip} = $peer_ip_address;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"sys::peer_target_ip" => $anvil->data->{sys}{peer_target_ip},
|
|
}});
|
|
}
|
|
else
|
|
{
|
|
# No access, wait and try it again.
|
|
$waiting = 1;
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0605", variables => {
|
|
peer => $peer_short_host_name,
|
|
network => $this_network,
|
|
peer_ip => $peer_ip_address,
|
|
}});
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if ($waiting)
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0606", variables => { peer => $peer_short_host_name }});
|
|
sleep 5;
|
|
}
|
|
}
|
|
|
|
# All networks are up.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0607", variables => { peer => $peer_short_host_name }});
|
|
|
|
return(0);
|
|
}
|
|
|
|
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't
|
|
# a node, this method will exit.
|
|
sub prerun_checks
|
|
{
|
|
my ($anvil) = @_;
|
|
|
|
$anvil->Database->get_hosts();
|
|
$anvil->Database->get_anvils();
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
host_uuid => $host_uuid,
|
|
host_type => $host_type,
|
|
}});
|
|
|
|
if ($host_type ne "node")
|
|
{
|
|
# We're done.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }});
|
|
|
|
if (not $anvil_uuid)
|
|
{
|
|
# This is a node, but not in an Anvil! yet.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
|
|
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
node1_host_uuid => $node1_host_uuid,
|
|
node2_host_uuid => $node2_host_uuid,
|
|
}});
|
|
|
|
$anvil->data->{sys}{anvil_uuid} = $anvil_uuid;
|
|
$anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
|
|
$anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
"sys::anvil_uuid" => $anvil->data->{sys}{anvil_uuid},
|
|
"sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid},
|
|
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}),
|
|
}});
|
|
|
|
# Are we being asked to enable or disable?
|
|
my $nodes = [$host_uuid];
|
|
my $set_to = 1;
|
|
my $message = "";
|
|
if ($anvil->data->{switches}{enable})
|
|
{
|
|
# We're enabling, which message will we use?
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600";
|
|
}
|
|
elsif ($anvil->data->{switches}{disable})
|
|
{
|
|
# We're disabling. Which message?
|
|
$set_to = 0;
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602";
|
|
}
|
|
|
|
# If we're updating the settings, do so and then exit.
|
|
if ($message)
|
|
{
|
|
if (not $anvil->data->{switches}{'local'})
|
|
{
|
|
# Add our peer as well.
|
|
push @{$nodes}, $anvil->data->{sys}{peer_host_uuid};
|
|
}
|
|
foreach my $host_uuid (@{$nodes})
|
|
{
|
|
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({
|
|
debug => 3,
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
variable_value => $set_to,
|
|
variable_default => 1,
|
|
variable_description => "striker_0286",
|
|
variable_section => "system",
|
|
variable_source_uuid => $host_uuid,
|
|
variable_source_table => "hosts",
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
}
|
|
|
|
# Record that it's been enabled.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
# Read my variables.
|
|
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
|
debug => 3,
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
variable_source_table => "hosts",
|
|
variable_source_uuid => $host_uuid,
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
local_enabled => $local_enabled,
|
|
variable_uuid => $variable_uuid,
|
|
}});
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
if (not $variable_uuid)
|
|
{
|
|
$local_enabled = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }});
|
|
}
|
|
|
|
# Have we just been asked for the status?
|
|
if ($anvil->data->{switches}{status})
|
|
{
|
|
# Yes, check our peer as well.
|
|
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
|
debug => 3,
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
variable_source_table => "hosts",
|
|
variable_source_uuid => $anvil->data->{sys}{peer_host_uuid},
|
|
});
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
peer_enabled => $peer_enabled,
|
|
variable_uuid => $variable_uuid,
|
|
}});
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
if (not $variable_uuid)
|
|
{
|
|
$peer_enabled = 1;
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }});
|
|
}
|
|
|
|
# What we tell the use slightly depends on which nodes are enabled.
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
local_enabled => $local_enabled,
|
|
peer_enabled => $peer_enabled,
|
|
}});
|
|
|
|
my $message = "";
|
|
if (($local_enabled) && ($peer_enabled))
|
|
{
|
|
# Both nodes are enabled.
|
|
$message = "message_0227";
|
|
}
|
|
elsif ((not $local_enabled) && (not $peer_enabled))
|
|
{
|
|
# Both nodes are disabled
|
|
$message = "message_0228";
|
|
}
|
|
elsif ($local_enabled)
|
|
{
|
|
# We're enabled, the peer is disabled.
|
|
$message = "message_0229";
|
|
}
|
|
else
|
|
{
|
|
# We're disabled, the peer is enabled.
|
|
$message = "message_0230";
|
|
}
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
# Is another instance running?
|
|
my $pids = $anvil->System->pids({
|
|
debug => 3,
|
|
ignore_me => 1,
|
|
program_name => $THIS_FILE,
|
|
});
|
|
my $other_instances = @{$pids};
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }});
|
|
|
|
if ($other_instances)
|
|
{
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
|
|
# Last test, enabled or forced?
|
|
if (not $local_enabled)
|
|
{
|
|
# Disabled. Forced?
|
|
if ($anvil->data->{switches}{force})
|
|
{
|
|
# Forced, run anyway.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"});
|
|
return(0);
|
|
}
|
|
else
|
|
{
|
|
# Exit.
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"});
|
|
$anvil->nice_exit({exit_code => 0});
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|