Merge pull request #71 from ClusterLabs/anvil-safe-start-work

* Started work on anvil-safe-start. The enable/disable logic and how …
main
digimer-bot 4 years ago committed by GitHub
commit c745a13991
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      share/words.xml
  2. 1
      tools/Makefile.am
  3. 258
      tools/anvil-safe-start
  4. 9
      tools/scancore

@ -1440,6 +1440,12 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0595">Updated the lvm.conf file to add the filter: [#!variable!filter!#] to prevent LVM from seeing the DRBD devices as LVM devices.</key>
<key name="log_0596">The host: [#!variable!host_name!#] last updated the database: [#!variable!difference!#] seconds ago, skipping power checks.</key>
<key name="log_0597">The host: [#!variable!host_name!#] has no entries in the 'updated' table, so ScanCore has likely never run. Skipping this host for now.</key>
<key name="log_0598">This host is not a node, this program isn't designed to run here.</key>
<key name="log_0599">Enabled 'anvil-safe-start' locally on this node.</key>
<key name="log_0600">Enabled 'anvil-safe-start' on both nodes in this Anvil! system.</key>
<key name="log_0601">Disabled 'anvil-safe-start' locally on this node.</key>
<key name="log_0602">Disabled 'anvil-safe-start' on both nodes in this Anvil! system.</key>
<key name="log_0603">This node is not in an Anvil! yet, so there's no reason to run this program.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
@ -1762,6 +1768,12 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0224">The node: [#!variable!host_name!#] is coming online; the cluster resource manager is running. (step 2/3)</key>
<key name="message_0225">The node: [#!variable!host_name!#] is coming online; the node is a consensus cluster member. (step 1/3)</key>
<key name="message_0226">The node: [#!variable!host_name!#] has booted, but it is not (yet) joining the cluster.</key>
<key name="message_0227">The 'anvil-safe-start' tool is enabled on both this node and on the peer.</key>
<key name="message_0228">The 'anvil-safe-start' tool is disabled on both this node and on the peer.</key>
<key name="message_0229">The 'anvil-safe-start' tool is enabled on this node and disabled on the peer.</key>
<key name="message_0230">The 'anvil-safe-start' tool is disabled on this node and enabled on the peer.</key>
<key name="message_0231">The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway.</key>
<key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key>
<!-- Success messages shown to the user -->
<key name="ok_0001">Saved the mail server information successfully!</key>
@ -2081,6 +2093,7 @@ If you are comfortable that the target has changed for a known reason, you can s
<key name="striker_0283">There are no #!string!brand_0006!# configured yet. Existing files will automatically sync to new clusters.</key>
<key name="striker_0284">Cancel</key>
<key name="striker_0285">Close</key>
<key name="striker_0286">This controls if 'anvil-safe-start' is enabled on a node.</key>
<!-- These are generally units and appended to numbers -->
<key name="suffix_0001">#!variable!number!#/sec</key>

@ -22,6 +22,7 @@ dist_sbin_SCRIPTS = \
anvil-migrate-server \
anvil-parse-fence-agents \
anvil-provision-server \
anvil-safe-start \
anvil-scan-network \
anvil-sync-shared \
anvil-update-issue \

@ -0,0 +1,258 @@
#!/usr/bin/perl
#
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers.
#
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' ->
# 'host_uuid' table.
#
# Exit codes;
# 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
# TODO:
# - Make this work on DR hosts.
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
# is to setup the database server.
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
$anvil->data->{switches}{disable} = "";
$anvil->data->{switches}{enable} = "";
$anvil->data->{switches}{force} = "";
$anvil->data->{switches}{'local'} = "";
$anvil->data->{switches}{status} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# If I have no databases, sleep until I do
if (not $anvil->data->{sys}{database}{connections})
{
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
# Wait until we have one.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"});
until($anvil->data->{sys}{database}{connections})
{
sleep 10;
$anvil->refresh();
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# Keep waiting
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"});
}
}
}
# Check to see if we should run. Also checks/sets enable/disable requests.
prerun_checks($anvil);
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't
# a node, this method will exit.
sub prerun_checks
{
my ($anvil) = @_;
$anvil->Database->get_hosts();
$anvil->Database->get_anvils();
my $host_uuid = $anvil->Get->host_uuid();
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
host_type => $host_type,
}});
if ($host_type ne "node")
{
# We're done.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"});
$anvil->nice_exit({exit_code => 0});
}
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }});
if (not $anvil_uuid)
{
# This is a node, but not in an Anvil! yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"});
$anvil->nice_exit({exit_code => 0});
}
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $peer_host_uuid = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
# Are we being asked to enable or disable?
my $nodes = [$host_uuid];
my $set_to = 1;
my $message = "";
if ($anvil->data->{switches}{enable})
{
# We're enabling, which message will we use?
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600";
}
elsif ($anvil->data->{switches}{disable})
{
# We're disabling. Which message?
$set_to = 0;
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602";
}
# If we're updating the settings, do so and then exit.
if ($message)
{
if (not $anvil->data->{switches}{'local'})
{
# Add our peer as well.
push @{$nodes}, $peer_host_uuid;
}
foreach my $host_uuid (@{$nodes})
{
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({
debug => 2,
variable_name => "tool::anvil-safe-start::enabled",
variable_value => $set_to,
variable_default => 1,
variable_description => "striker_0286",
variable_section => "system",
variable_source_uuid => $host_uuid,
variable_source_table => "hosts",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
}
# Record that it's been enabled.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
$anvil->nice_exit({exit_code => 0});
}
# Read my variables.
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
debug => 2,
variable_name => "tool::anvil-safe-start::enabled",
variable_source_table => "hosts",
variable_source_uuid => $host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_enabled => $local_enabled,
variable_uuid => $variable_uuid,
}});
# No UUID means the value hasn't been recorded, so we default to 1.
if (not $variable_uuid)
{
$local_enabled = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }});
}
# Have we just been asked for the status?
if ($anvil->data->{switches}{status})
{
# Yes, check our peer as well.
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
variable_name => "tool::anvil-safe-start::enabled",
variable_source_table => "hosts",
variable_source_uuid => $peer_host_uuid,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_enabled => $peer_enabled,
variable_uuid => $variable_uuid,
}});
# No UUID means the value hasn't been recorded, so we default to 1.
if (not $variable_uuid)
{
$peer_enabled = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }});
}
# What we tell the use slightly depends on which nodes are enabled.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_enabled => $local_enabled,
peer_enabled => $peer_enabled,
}});
my $message = "";
if (($local_enabled) && ($peer_enabled))
{
# Both nodes are enabled.
$message = "message_0227";
}
elsif ((not $local_enabled) && (not $peer_enabled))
{
# Both nodes are disabled
$message = "message_0228";
}
elsif ($local_enabled)
{
# We're enabled, the peer is disabled.
$message = "message_0229";
}
else
{
# We're disabled, the peer is enabled.
$message = "message_0230";
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
$anvil->nice_exit({exit_code => 0});
}
if (not $local_enabled)
{
# Disabled. Forced?
if ($anvil->data->{switches}{force})
{
# Forced, run anyway.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"});
}
else
{
# Exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"});
$anvil->nice_exit({exit_code => 0});
}
}
return(0);
}

@ -332,6 +332,15 @@ sub startup_tasks
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
# If we've been up for less than ten minutes, call anvil-safe-start as a background process. It will
# exit if it is disabled.
my $uptime = $anvil->Get->uptime;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }});
if ($uptime < 600)
{
}
return(0);
}

Loading…
Cancel
Save