* Started work on anvil-safe-start. The enable/disable logic and how it runs automatically is controlled by the database and the tool can be used to control anvil-safe-start on both the local and peer node. It will be started by ScanCore, if scancore starts within 10 minutes of the node booting. It will always be able to run manually.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 75343aadff
commit 15e71768a1
  1. 13
      share/words.xml
  2. 1
      tools/Makefile.am
  3. 258
      tools/anvil-safe-start
  4. 9
      tools/scancore

@ -1440,6 +1440,12 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0595">Updated the lvm.conf file to add the filter: [#!variable!filter!#] to prevent LVM from seeing the DRBD devices as LVM devices.</key>
<key name="log_0596">The host: [#!variable!host_name!#] last updated the database: [#!variable!difference!#] seconds ago, skipping power checks.</key>
<key name="log_0597">The host: [#!variable!host_name!#] has no entries in the 'updated' table, so ScanCore has likely never run. Skipping this host for now.</key>
<key name="log_0598">This host is not a node, this program isn't designed to run here.</key>
<key name="log_0599">Enabled 'anvil-safe-start' locally on this node.</key>
<key name="log_0600">Enabled 'anvil-safe-start' on both nodes in this Anvil! system.</key>
<key name="log_0601">Disabled 'anvil-safe-start' locally on this node.</key>
<key name="log_0602">Disabled 'anvil-safe-start' on both nodes in this Anvil! system.</key>
<key name="log_0603">This node is not in an Anvil! yet, so there's no reason to run this program.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -1762,6 +1768,12 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0224">The node: [#!variable!host_name!#] is coming online; the cluster resource manager is running. (step 2/3)</key>
<key name="message_0225">The node: [#!variable!host_name!#] is coming online; the node is a consensus cluster member. (step 1/3)</key>
<key name="message_0226">The node: [#!variable!host_name!#] has booted, but it is not (yet) joining the cluster.</key>
<key name="message_0227">The 'anvil-safe-start' tool is enabled on both this node and on the peer.</key>
<key name="message_0228">The 'anvil-safe-start' tool is disabled on both this node and on the peer.</key>
<key name="message_0229">The 'anvil-safe-start' tool is enabled on this node and disabled on the peer.</key>
<key name="message_0230">The 'anvil-safe-start' tool is disabled on this node and enabled on the peer.</key>
<key name="message_0231">The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway.</key>
<key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key>
<!-- Success messages shown to the user -->
<key name="ok_0001">Saved the mail server information successfully!</key>
@ -2081,6 +2093,7 @@ If you are comfortable that the target has changed for a known reason, you can s
<key name="striker_0283">There are no #!string!brand_0006!# configured yet. Existing files will automatically sync to new clusters.</key>
<key name="striker_0284">Cancel</key>
<key name="striker_0285">Close</key>
<key name="striker_0286">This controls if 'anvil-safe-start' is enabled on a node.</key>
<!-- These are generally units and appended to numbers -->
<key name="suffix_0001">#!variable!number!#/sec</key>

@ -22,6 +22,7 @@ dist_sbin_SCRIPTS = \
anvil-migrate-server \
anvil-parse-fence-agents \
anvil-provision-server \
anvil-safe-start \
anvil-scan-network \
anvil-sync-shared \
anvil-update-issue \

@ -0,0 +1,258 @@
#!/usr/bin/perl
#
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers.
#
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' ->
# 'host_uuid' table.
#
# Exit codes;
# 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
# TODO:
# - Make this work on DR hosts.
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
$anvil->data->{switches}{disable} = "";
$anvil->data->{switches}{enable} = "";
$anvil->data->{switches}{force} = "";
$anvil->data->{switches}{'local'} = "";
$anvil->data->{switches}{status} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# If I have no databases, sleep until I do
if (not $anvil->data->{sys}{database}{connections})
{
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
# Wait until we have one.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"});
until($anvil->data->{sys}{database}{connections})
{
sleep 10;
$anvil->refresh();
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# Keep waiting
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"});
}
}
}
# Check to see if we should run. Also checks/sets enable/disable requests.
prerun_checks($anvil);
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't
# a node, this method will exit.
sub prerun_checks
{
my ($anvil) = @_;
$anvil->Database->get_hosts();
$anvil->Database->get_anvils();
my $host_uuid = $anvil->Get->host_uuid();
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
host_type => $host_type,
}});
if ($host_type ne "node")
{
# We're done.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"});
$anvil->nice_exit({exit_code => 0});
}
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }});
if (not $anvil_uuid)
{
# This is a node, but not in an Anvil! yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"});
$anvil->nice_exit({exit_code => 0});
}
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $peer_host_uuid = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
# Are we being asked to enable or disable?
my $nodes = [$host_uuid];
my $set_to = 1;
my $message = "";
if ($anvil->data->{switches}{enable})
{
# We're enabling, which message will we use?
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600";
}
elsif ($anvil->data->{switches}{disable})
{
# We're disabling. Which message?
$set_to = 0;
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602";
}
# If we're updating the settings, do so and then exit.
if ($message)
{
if (not $anvil->data->{switches}{'local'})
{
# Add our peer as well.
push @{$nodes}, $peer_host_uuid;
}
foreach my $host_uuid (@{$nodes})
{
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({
debug => 2,
variable_name => "tool::anvil-safe-start::enabled",
variable_value => $set_to,
variable_default => 1,
variable_description => "striker_0286",
variable_section => "system",
variable_source_uuid => $host_uuid,
variable_source_table => "hosts",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
}
# Record that it's been enabled.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
$anvil->nice_exit({exit_code => 0});
}
# Read my variables.
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
debug => 2,
variable_name => "tool::anvil-safe-start::enabled",
variable_source_table => "hosts",
variable_source_uuid => $host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_enabled => $local_enabled,
variable_uuid => $variable_uuid,
}});
# No UUID means the value hasn't been recorded, so we default to 1.
if (not $variable_uuid)
{
$local_enabled = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }});
}
# Have we just been asked for the status?
if ($anvil->data->{switches}{status})
{
# Yes, check our peer as well.
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
variable_name => "tool::anvil-safe-start::enabled",
variable_source_table => "hosts",
variable_source_uuid => $peer_host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_enabled => $peer_enabled,
variable_uuid => $variable_uuid,
}});
# No UUID means the value hasn't been recorded, so we default to 1.
if (not $variable_uuid)
{
$peer_enabled = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }});
}
# What we tell the use slightly depends on which nodes are enabled.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_enabled => $local_enabled,
peer_enabled => $peer_enabled,
}});
my $message = "";
if (($local_enabled) && ($peer_enabled))
{
# Both nodes are enabled.
$message = "message_0227";
}
elsif ((not $local_enabled) && (not $peer_enabled))
{
# Both nodes are disabled
$message = "message_0228";
}
elsif ($local_enabled)
{
# We're enabled, the peer is disabled.
$message = "message_0229";
}
else
{
# We're disabled, the peer is enabled.
$message = "message_0230";
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
$anvil->nice_exit({exit_code => 0});
}
if (not $local_enabled)
{
# Disabled. Forced?
if ($anvil->data->{switches}{force})
{
# Forced, run anyway.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"});
}
else
{
# Exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"});
$anvil->nice_exit({exit_code => 0});
}
}
return(0);
}

@ -332,6 +332,15 @@ sub startup_tasks
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
# If we've been up for less than ten minutes, call anvil-safe-start as a background process. It will
# exit if it is disabled.
my $uptime = $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
if ($uptime < 600)
{
}
return(0);
}

Loading…
Cancel
Save