#!/usr/bin/perl # # This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers. # # NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes # of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool # is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' -> # 'host_uuid' table. # # Exit codes; # 0 = Normal exit. # 1 = Any problem that causes an early exit. # # TODO: # - Make this work on DR hosts. # use strict; use warnings; use Anvil::Tools; use NetAddr::IP; require POSIX; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } # Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. $| = 1; my $anvil = Anvil::Tools->new(); $anvil->Get->switches; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); # Make sure we're running as 'root' # $< == real UID, $> == effective UID if (($< != 0) && ($> != 0)) { # Not root print $anvil->Words->string({key => "error_0005"})."\n"; $anvil->nice_exit({exit_code => 1}); } # Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks # is to setup the database server. $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); $anvil->data->{switches}{disable} = ""; $anvil->data->{switches}{enable} = ""; $anvil->data->{switches}{force} = ""; $anvil->data->{switches}{'local'} = ""; $anvil->data->{switches}{status} = ""; $anvil->Get->switches; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); # If I have no databases, sleep until I do if (not $anvil->data->{sys}{database}{connections}) { # If this is a dashboard, try to configure and then connect to the local database. If this isn't a # Wait until we have one. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); until($anvil->data->{sys}{database}{connections}) { sleep 10; $anvil->refresh(); $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # Keep waiting $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); } } } ### Process # 1. Check if I am enabled and that no other copies are running. # 2. Can I ping my peer on all three networks? Loop until true. # - Wait here indefinately # 3. ... # 6. Using Start Groups/Delays (and ignoring 'clean' off VMs), boot servers. # Check to see if we should run. Also checks/sets enable/disable requests. prerun_checks($anvil); # Wait until I can ping the peer on all three networks. This will not return until access is available on all # networks. There is no timeout. wait_for_access($anvil); $anvil->nice_exit({exit_code => 0}); ############################################################################################################# # Functions # ############################################################################################################# # Check for which networks we have and verify that we can ping our peer on each. This function will not # return until all networks are up. sub wait_for_access { my ($anvil) = @_; my $host_uuid = $anvil->Get->host_uuid(); my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name}; my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid}; my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name}; my $peer_password = $anvil->data->{sys}{peer_password}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid, short_host_name => $short_host_name, peer_host_uuid => $peer_host_uuid, peer_short_host_name => $peer_short_host_name, peer_password => $anvil->Log->is_secure($peer_password), }}); my $waiting = 1; while ($waiting) { # This will get set back to '1' if $waiting = 0; # Load IPs (again, to catch changes that might be delaying startup) $anvil->Network->load_ips({ clear => 1, host => $short_host_name, host_uuid => $host_uuid, }); $anvil->Network->load_ips({ clear => 1, host => $peer_short_host_name, host_uuid => $peer_host_uuid, }); # Loop through our interfaces and then loop our peers. Test access over them and set # 'waiting' back to '1' if the connection fails. foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { interface => $interface, waiting => $waiting, }}); # Only care about our networks. next if $waiting; if (($interface !~ /^bcn/) && ($interface !~ /^sn/) && ($interface !~ /^ifn/)) { # Not an interface we care about next; } my $this_network = ($interface =~ /^(.*?)_/)[0]; my $ip_address = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip}; my $subnet_mask = $anvil->data->{network}{$short_host_name}{interface}{$interface}{subnet_mask}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:this_network' => $this_network, 's2:ip_address' => $ip_address, 's3:subnet_mask' => $subnet_mask, }}); ### NOTE: I know I could match interface names, but that's not certain enough. It's ### possible (if unlikely) that the network name+numbre differs on our peer. So ### this is safer. # Loop through my peer's interfaces and see if we're sharing this one. my $local_network = NetAddr::IP->new($ip_address."/".$subnet_mask); my $peer_match_found = 0; foreach my $peer_interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$peer_short_host_name}{interface}}) { last if $peer_match_found; my $peer_ip_address = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{ip}; my $peer_subnet_mask = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{subnet_mask}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_interface => $peer_interface, peer_ip_address => $peer_ip_address, peer_subnet_mask => $peer_subnet_mask, }}); # This the matching network? next if $subnet_mask ne $peer_subnet_mask; my $peer_network = NetAddr::IP->new($peer_ip_address."/".$peer_subnet_mask); if ($peer_network->within($local_network)) { # Match, test access. $peer_match_found = 1; my $access = $anvil->Remote->test_access({ target => $peer_ip_address, password => $peer_password, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); if ($access) { # This network is good. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0604", variables => { peer => $peer_short_host_name, network => $this_network, peer_ip => $peer_ip_address, }}); } else { # No access, wait and try it again. $waiting = 1; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0605", variables => { peer => $peer_short_host_name, network => $this_network, peer_ip => $peer_ip_address, }}); } } } } if ($waiting) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0606", variables => { peer => $peer_short_host_name }}); sleep 5; } } # All networks are up. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0607", variables => { peer => $peer_short_host_name }}); return(0); } # This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't # a node, this method will exit. sub prerun_checks { my ($anvil) = @_; $anvil->Database->get_hosts(); $anvil->Database->get_anvils(); my $host_uuid = $anvil->Get->host_uuid(); my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid, host_type => $host_type, }}); if ($host_type ne "node") { # We're done. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"}); $anvil->nice_exit({exit_code => 0}); } my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); if (not $anvil_uuid) { # This is a node, but not in an Anvil! yet. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"}); $anvil->nice_exit({exit_code => 0}); } my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node1_host_uuid => $node1_host_uuid, node2_host_uuid => $node2_host_uuid, }}); $anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; $anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid}, "sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}), }}); # Are we being asked to enable or disable? my $nodes = [$host_uuid]; my $set_to = 1; my $message = ""; if ($anvil->data->{switches}{enable}) { # We're enabling, which message will we use? $message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600"; } elsif ($anvil->data->{switches}{disable}) { # We're disabling. Which message? $set_to = 0; $message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602"; } # If we're updating the settings, do so and then exit. if ($message) { if (not $anvil->data->{switches}{'local'}) { # Add our peer as well. push @{$nodes}, $anvil->data->{sys}{peer_host_uuid}; } foreach my $host_uuid (@{$nodes}) { my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ debug => 3, variable_name => "tool::anvil-safe-start::enabled", variable_value => $set_to, variable_default => 1, variable_description => "striker_0286", variable_section => "system", variable_source_uuid => $host_uuid, variable_source_table => "hosts", }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); } # Record that it's been enabled. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); $anvil->nice_exit({exit_code => 0}); } # Read my variables. my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ debug => 3, variable_name => "tool::anvil-safe-start::enabled", variable_source_table => "hosts", variable_source_uuid => $host_uuid, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled, variable_uuid => $variable_uuid, }}); # No UUID means the value hasn't been recorded, so we default to 1. if (not $variable_uuid) { $local_enabled = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }}); } # Have we just been asked for the status? if ($anvil->data->{switches}{status}) { # Yes, check our peer as well. my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({ debug => 3, variable_name => "tool::anvil-safe-start::enabled", variable_source_table => "hosts", variable_source_uuid => $anvil->data->{sys}{peer_host_uuid}, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled, variable_uuid => $variable_uuid, }}); # No UUID means the value hasn't been recorded, so we default to 1. if (not $variable_uuid) { $peer_enabled = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }}); } # What we tell the use slightly depends on which nodes are enabled. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled, peer_enabled => $peer_enabled, }}); my $message = ""; if (($local_enabled) && ($peer_enabled)) { # Both nodes are enabled. $message = "message_0227"; } elsif ((not $local_enabled) && (not $peer_enabled)) { # Both nodes are disabled $message = "message_0228"; } elsif ($local_enabled) { # We're enabled, the peer is disabled. $message = "message_0229"; } else { # We're disabled, the peer is enabled. $message = "message_0230"; } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message}); $anvil->nice_exit({exit_code => 0}); } # Is another instance running? my $pids = $anvil->System->pids({ debug => 3, ignore_me => 1, program_name => $THIS_FILE, }); my $other_instances = @{$pids}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }}); if ($other_instances) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233"}); $anvil->nice_exit({exit_code => 0}); } # Last test, enabled or forced? if (not $local_enabled) { # Disabled. Forced? if ($anvil->data->{switches}{force}) { # Forced, run anyway. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"}); return(0); } else { # Exit. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"}); $anvil->nice_exit({exit_code => 0}); } } return(0); }