diff --git a/share/words.xml b/share/words.xml index c83f24eb..98608447 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1440,6 +1440,12 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Updated the lvm.conf file to add the filter: [#!variable!filter!#] to prevent LVM from seeing the DRBD devices as LVM devices. The host: [#!variable!host_name!#] last updated the database: [#!variable!difference!#] seconds ago, skipping power checks. The host: [#!variable!host_name!#] has no entries in the 'updated' table, so ScanCore has likely never run. Skipping this host for now. + This host is not a node, this program isn't designed to run here. + Enabled 'anvil-safe-start' locally on this node. + Enabled 'anvil-safe-start' on both nodes in this Anvil! system. + Disabled 'anvil-safe-start' locally on this node. + Disabled 'anvil-safe-start' on both nodes in this Anvil! system. + This node is not in an Anvil! yet, so there's no reason to run this program. The host name: [#!variable!target!#] does not resolve to an IP address. @@ -1762,6 +1768,12 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty The node: [#!variable!host_name!#] is coming online; the cluster resource manager is running. (step 2/3) The node: [#!variable!host_name!#] is coming online; the node is a consensus cluster member. (step 1/3) The node: [#!variable!host_name!#] has booted, but it is not (yet) joining the cluster. + The 'anvil-safe-start' tool is enabled on both this node and on the peer. + The 'anvil-safe-start' tool is disabled on both this node and on the peer. + The 'anvil-safe-start' tool is enabled on this node and disabled on the peer. + The 'anvil-safe-start' tool is disabled on this node and enabled on the peer. + The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway. + The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding. Saved the mail server information successfully! @@ -2081,6 +2093,7 @@ If you are comfortable that the target has changed for a known reason, you can s There are no #!string!brand_0006!# configured yet. Existing files will automatically sync to new clusters. Cancel Close + This controls if 'anvil-safe-start' is enabled on a node. #!variable!number!#/sec diff --git a/tools/Makefile.am b/tools/Makefile.am index 0f5d3948..1a192b74 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -22,6 +22,7 @@ dist_sbin_SCRIPTS = \ anvil-migrate-server \ anvil-parse-fence-agents \ anvil-provision-server \ + anvil-safe-start \ anvil-scan-network \ anvil-sync-shared \ anvil-update-issue \ diff --git a/tools/anvil-safe-start b/tools/anvil-safe-start new file mode 100755 index 00000000..77ad7f64 --- /dev/null +++ b/tools/anvil-safe-start @@ -0,0 +1,258 @@ +#!/usr/bin/perl +# +# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers. +# +# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes +# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool +# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' -> +# 'host_uuid' table. +# +# Exit codes; +# 0 = Normal exit. +# 1 = Any problem that causes an early exit. +# +# TODO: +# - Make this work on DR hosts. +# + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); + +# Make sure we're running as 'root' +# $< == real UID, $> == effective UID +if (($< != 0) && ($> != 0)) +{ + # Not root + print $anvil->Words->string({key => "error_0005"})."\n"; + $anvil->nice_exit({exit_code => 1}); +} + +# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks +# is to setup the database server. +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); + +$anvil->data->{switches}{disable} = ""; +$anvil->data->{switches}{enable} = ""; +$anvil->data->{switches}{force} = ""; +$anvil->data->{switches}{'local'} = ""; +$anvil->data->{switches}{status} = ""; +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); + + +# If I have no databases, sleep until I do +if (not $anvil->data->{sys}{database}{connections}) +{ + # If this is a dashboard, try to configure and then connect to the local database. If this isn't a + # Wait until we have one. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); + + until($anvil->data->{sys}{database}{connections}) + { + sleep 10; + + $anvil->refresh(); + $anvil->Database->connect(); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); + if (not $anvil->data->{sys}{database}{connections}) + { + # Keep waiting + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); + } + } +} + +# Check to see if we should run. Also checks/sets enable/disable requests. +prerun_checks($anvil); + +$anvil->nice_exit({exit_code => 0}); + +############################################################################################################# +# Functions # +############################################################################################################# + +# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't +# a node, this method will exit. +sub prerun_checks +{ + my ($anvil) = @_; + + $anvil->Database->get_hosts(); + $anvil->Database->get_anvils(); + + my $host_uuid = $anvil->Get->host_uuid(); + my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + host_uuid => $host_uuid, + host_type => $host_type, + }}); + + if ($host_type ne "node") + { + # We're done. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"}); + $anvil->nice_exit({exit_code => 0}); + } + + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); + + if (not $anvil_uuid) + { + # This is a node, but not in an Anvil! yet. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"}); + $anvil->nice_exit({exit_code => 0}); + } + + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $peer_host_uuid = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; + + # Are we being asked to enable or disable? + my $nodes = [$host_uuid]; + my $set_to = 1; + my $message = ""; + if ($anvil->data->{switches}{enable}) + { + # We're enabling, which message will we use? + $message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600"; + } + elsif ($anvil->data->{switches}{disable}) + { + # We're disabling. Which message? + $set_to = 0; + $message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602"; + } + + # If we're updating the settings, do so and then exit. + if ($message) + { + if (not $anvil->data->{switches}{'local'}) + { + # Add our peer as well. + push @{$nodes}, $peer_host_uuid; + } + foreach my $host_uuid (@{$nodes}) + { + my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ + debug => 2, + variable_name => "tool::anvil-safe-start::enabled", + variable_value => $set_to, + variable_default => 1, + variable_description => "striker_0286", + variable_section => "system", + variable_source_uuid => $host_uuid, + variable_source_table => "hosts", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + } + + # Record that it's been enabled. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); + $anvil->nice_exit({exit_code => 0}); + } + + # Read my variables. + my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ + debug => 2, + variable_name => "tool::anvil-safe-start::enabled", + variable_source_table => "hosts", + variable_source_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_enabled => $local_enabled, + variable_uuid => $variable_uuid, + }}); + # No UUID means the value hasn't been recorded, so we default to 1. + if (not $variable_uuid) + { + $local_enabled = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }}); + } + + # Have we just been asked for the status? + if ($anvil->data->{switches}{status}) + { + # Yes, check our peer as well. + my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ + variable_name => "tool::anvil-safe-start::enabled", + variable_source_table => "hosts", + variable_source_uuid => $peer_host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer_enabled => $peer_enabled, + variable_uuid => $variable_uuid, + }}); + # No UUID means the value hasn't been recorded, so we default to 1. + if (not $variable_uuid) + { + $peer_enabled = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }}); + } + + # What we tell the use slightly depends on which nodes are enabled. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_enabled => $local_enabled, + peer_enabled => $peer_enabled, + }}); + + my $message = ""; + if (($local_enabled) && ($peer_enabled)) + { + # Both nodes are enabled. + $message = "message_0227"; + } + elsif ((not $local_enabled) && (not $peer_enabled)) + { + # Both nodes are disabled + $message = "message_0228"; + } + elsif ($local_enabled) + { + # We're enabled, the peer is disabled. + $message = "message_0229"; + } + else + { + # We're disabled, the peer is enabled. + $message = "message_0230"; + } + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); + $anvil->nice_exit({exit_code => 0}); + } + + if (not $local_enabled) + { + # Disabled. Forced? + if ($anvil->data->{switches}{force}) + { + # Forced, run anyway. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"}); + } + else + { + # Exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"}); + $anvil->nice_exit({exit_code => 0}); + } + } + + return(0); +} diff --git a/tools/scancore b/tools/scancore index 4d0c2505..0662999d 100755 --- a/tools/scancore +++ b/tools/scancore @@ -332,6 +332,15 @@ sub startup_tasks }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + # If we've been up for less than ten minutes, call anvil-safe-start as a background process. It will + # exit if it is disabled. + my $uptime = $anvil->Get->uptime; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { uptime => $uptime }}); + if ($uptime < 600) + { + + } + return(0); }