From 15e71768a13c0120a23d25c1244ca1ec9e422c04 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 12 Apr 2021 00:28:24 -0400 Subject: [PATCH] * Started work on anvil-safe-start. The enable/disable logic and how it runs automatically is controlled by the database and the tool can be used to control anvil-safe-start on both the local and peer node. It will be started by ScanCore, if scancore starts within 10 minutes of the node booting. It will always be able to run manually. Signed-off-by: Digimer --- share/words.xml | 13 +++ tools/Makefile.am | 1 + tools/anvil-safe-start | 258 +++++++++++++++++++++++++++++++++++++++++ tools/scancore | 9 ++ 4 files changed, 281 insertions(+) create mode 100755 tools/anvil-safe-start diff --git a/share/words.xml b/share/words.xml index c83f24eb..98608447 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1440,6 +1440,12 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Updated the lvm.conf file to add the filter: [#!variable!filter!#] to prevent LVM from seeing the DRBD devices as LVM devices. The host: [#!variable!host_name!#] last updated the database: [#!variable!difference!#] seconds ago, skipping power checks. The host: [#!variable!host_name!#] has no entries in the 'updated' table, so ScanCore has likely never run. Skipping this host for now. + This host is not a node, this program isn't designed to run here. + Enabled 'anvil-safe-start' locally on this node. + Enabled 'anvil-safe-start' on both nodes in this Anvil! system. + Disabled 'anvil-safe-start' locally on this node. + Disabled 'anvil-safe-start' on both nodes in this Anvil! system. + This node is not in an Anvil! yet, so there's no reason to run this program. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -1762,6 +1768,12 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty The node: [#!variable!host_name!#] is coming online; the cluster resource manager is running. (step 2/3) The node: [#!variable!host_name!#] is coming online; the node is a consensus cluster member. (step 1/3) The node: [#!variable!host_name!#] has booted, but it is not (yet) joining the cluster. + The 'anvil-safe-start' tool is enabled on both this node and on the peer. + The 'anvil-safe-start' tool is disabled on both this node and on the peer. + The 'anvil-safe-start' tool is enabled on this node and disabled on the peer. + The 'anvil-safe-start' tool is disabled on this node and enabled on the peer. + The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway. + The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding. Saved the mail server information successfully! @@ -2081,6 +2093,7 @@ If you are comfortable that the target has changed for a known reason, you can s There are no #!string!brand_0006!# configured yet. Existing files will automatically sync to new clusters. Cancel Close + This controls if 'anvil-safe-start' is enabled on a node. #!variable!number!#/sec diff --git a/tools/Makefile.am b/tools/Makefile.am index 0f5d3948..1a192b74 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -22,6 +22,7 @@ dist_sbin_SCRIPTS = \ anvil-migrate-server \ anvil-parse-fence-agents \ anvil-provision-server \ + anvil-safe-start \ anvil-scan-network \ anvil-sync-shared \ anvil-update-issue \ diff --git a/tools/anvil-safe-start b/tools/anvil-safe-start new file mode 100755 index 00000000..77ad7f64 --- /dev/null +++ b/tools/anvil-safe-start @@ -0,0 +1,258 @@ +#!/usr/bin/perl +# +# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers. +# +# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes +# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool +# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' -> +# 'host_uuid' table. +# +# Exit codes; +# 0 = Normal exit. +# 1 = Any problem that causes an early exit. +# +# TODO: +# - Make this work on DR hosts. +# + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); + +# Make sure we're running as 'root' +# $< == real UID, $> == effective UID +if (($< != 0) && ($> != 0)) +{ + # Not root + print $anvil->Words->string({key => "error_0005"})."\n"; + $anvil->nice_exit({exit_code => 1}); +} + +# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks +# is to setup the database server. +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); + +$anvil->data->{switches}{disable} = ""; +$anvil->data->{switches}{enable} = ""; +$anvil->data->{switches}{force} = ""; +$anvil->data->{switches}{'local'} = ""; +$anvil->data->{switches}{status} = ""; +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); + + +# If I have no databases, sleep until I do +if (not $anvil->data->{sys}{database}{connections}) +{ + # If this is a dashboard, try to configure and then connect to the local database. If this isn't a + # Wait until we have one. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); + + until($anvil->data->{sys}{database}{connections}) + { + sleep 10; + + $anvil->refresh(); + $anvil->Database->connect(); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); + if (not $anvil->data->{sys}{database}{connections}) + { + # Keep waiting + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); + } + } +} + +# Check to see if we should run. Also checks/sets enable/disable requests. +prerun_checks($anvil); + +$anvil->nice_exit({exit_code => 0}); + +############################################################################################################# +# Functions # +############################################################################################################# + +# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't +# a node, this method will exit. +sub prerun_checks +{ + my ($anvil) = @_; + + $anvil->Database->get_hosts(); + $anvil->Database->get_anvils(); + + my $host_uuid = $anvil->Get->host_uuid(); + my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host_uuid => $host_uuid, + host_type => $host_type, + }}); + + if ($host_type ne "node") + { + # We're done. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"}); + $anvil->nice_exit({exit_code => 0}); + } + + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); + + if (not $anvil_uuid) + { + # This is a node, but not in an Anvil! yet. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"}); + $anvil->nice_exit({exit_code => 0}); + } + + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $peer_host_uuid = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; + + # Are we being asked to enable or disable? + my $nodes = [$host_uuid]; + my $set_to = 1; + my $message = ""; + if ($anvil->data->{switches}{enable}) + { + # We're enabling, which message will we use? + $message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600"; + } + elsif ($anvil->data->{switches}{disable}) + { + # We're disabling. Which message? + $set_to = 0; + $message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602"; + } + + # If we're updating the settings, do so and then exit. + if ($message) + { + if (not $anvil->data->{switches}{'local'}) + { + # Add our peer as well. + push @{$nodes}, $peer_host_uuid; + } + foreach my $host_uuid (@{$nodes}) + { + my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ + debug => 2, + variable_name => "tool::anvil-safe-start::enabled", + variable_value => $set_to, + variable_default => 1, + variable_description => "striker_0286", + variable_section => "system", + variable_source_uuid => $host_uuid, + variable_source_table => "hosts", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + } + + # Record that it's been enabled. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); + $anvil->nice_exit({exit_code => 0}); + } + + # Read my variables. + my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ + debug => 2, + variable_name => "tool::anvil-safe-start::enabled", + variable_source_table => "hosts", + variable_source_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_enabled => $local_enabled, + variable_uuid => $variable_uuid, + }}); + # No UUID means the value hasn't been recorded, so we default to 1. + if (not $variable_uuid) + { + $local_enabled = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }}); + } + + # Have we just been asked for the status? + if ($anvil->data->{switches}{status}) + { + # Yes, check our peer as well. + my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ + variable_name => "tool::anvil-safe-start::enabled", + variable_source_table => "hosts", + variable_source_uuid => $peer_host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer_enabled => $peer_enabled, + variable_uuid => $variable_uuid, + }}); + # No UUID means the value hasn't been recorded, so we default to 1. + if (not $variable_uuid) + { + $peer_enabled = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }}); + } + + # What we tell the use slightly depends on which nodes are enabled. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_enabled => $local_enabled, + peer_enabled => $peer_enabled, + }}); + + my $message = ""; + if (($local_enabled) && ($peer_enabled)) + { + # Both nodes are enabled. + $message = "message_0227"; + } + elsif ((not $local_enabled) && (not $peer_enabled)) + { + # Both nodes are disabled + $message = "message_0228"; + } + elsif ($local_enabled) + { + # We're enabled, the peer is disabled. + $message = "message_0229"; + } + else + { + # We're disabled, the peer is enabled. + $message = "message_0230"; + } + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); + $anvil->nice_exit({exit_code => 0}); + } + + if (not $local_enabled) + { + # Disabled. Forced? + if ($anvil->data->{switches}{force}) + { + # Forced, run anyway. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"}); + } + else + { + # Exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"}); + $anvil->nice_exit({exit_code => 0}); + } + } + + return(0); +} diff --git a/tools/scancore b/tools/scancore index 4d0c2505..0662999d 100755 --- a/tools/scancore +++ b/tools/scancore @@ -332,6 +332,15 @@ sub startup_tasks }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + # If we've been up for less than ten minutes, call anvil-safe-start as a background process. It will + # exit if it is disabled. + my $uptime = $anvil->Get->uptime; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }}); + if ($uptime < 600) + { + + } + return(0); }