2021-04-12 04:28:24 +00:00
|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# This does boot-time sanity checks on nodes and then, if all is well, joins the cluster and boots servers.
|
|
|
|
#
|
|
|
|
# NOTE: Unlike M2, this is controlled by scancore's start, but only if scancore starts up within ten minutes
|
|
|
|
# of the node itself booting. This way, stopping/starting scancore won't call us repeatedly. This tool
|
|
|
|
# is enabled or disabled via the 'tool::anvil-safe-start::enabled' variable tied to the 'hosts' ->
|
|
|
|
# 'host_uuid' table.
|
|
|
|
#
|
|
|
|
# Exit codes;
|
|
|
|
# 0 = Normal exit.
|
|
|
|
# 1 = Any problem that causes an early exit.
|
|
|
|
#
|
|
|
|
# TODO:
|
2021-04-23 04:04:20 +00:00
|
|
|
# - Add job support
|
2021-04-12 04:28:24 +00:00
|
|
|
# - Make this work on DR hosts.
|
2021-04-14 04:26:06 +00:00
|
|
|
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts.
|
2021-05-14 03:27:38 +00:00
|
|
|
# - Start DRBD resources if the VMs are running already on the peer.
|
2023-06-23 00:36:09 +00:00
|
|
|
# - Check that the installed kernel-headers matches the running kernel and, if not, check with grubby to
|
|
|
|
# ensure the right kernel is set to boot. Then alert the user to a likely need to reboot.
|
2021-04-12 04:28:24 +00:00
|
|
|
#
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use Anvil::Tools;
|
2021-04-13 00:46:30 +00:00
|
|
|
use NetAddr::IP;
|
2021-04-12 04:28:24 +00:00
|
|
|
require POSIX;
|
|
|
|
|
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
|
|
|
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
|
|
|
|
if (($running_directory =~ /^\./) && ($ENV{PWD}))
|
|
|
|
{
|
|
|
|
$running_directory =~ s/^\./$ENV{PWD}/;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
my $anvil = Anvil::Tools->new();
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
|
2021-04-12 04:28:24 +00:00
|
|
|
|
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
|
|
|
# Read switches
|
|
|
|
$anvil->Get->switches({list => [], man => $THIS_FILE});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
|
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
# Make sure we're running as 'root'
|
|
|
|
# $< == real UID, $> == effective UID
|
|
|
|
if (($< != 0) && ($> != 0))
|
|
|
|
{
|
|
|
|
# Not root
|
|
|
|
print $anvil->Words->string({key => "error_0005"})."\n";
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
|
2023-07-25 02:32:41 +00:00
|
|
|
# If dnf is running, hold.
|
|
|
|
$anvil->System->wait_on_dnf();
|
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks
|
|
|
|
# is to setup the database server.
|
|
|
|
$anvil->Database->connect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"});
|
|
|
|
|
|
|
|
# If I have no databases, sleep until I do
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# If this is a dashboard, try to configure and then connect to the local database. If this isn't a
|
|
|
|
# Wait until we have one.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"});
|
|
|
|
|
|
|
|
until($anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
sleep 10;
|
|
|
|
|
|
|
|
$anvil->refresh();
|
|
|
|
$anvil->Database->connect();
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
|
|
|
|
if (not $anvil->data->{sys}{database}{connections})
|
|
|
|
{
|
|
|
|
# Keep waiting
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, secure => 0, key => "log_0439"});
|
2021-04-12 04:28:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-13 00:46:30 +00:00
|
|
|
### Process
|
|
|
|
# 1. Check if I am enabled and that no other copies are running.
|
|
|
|
# 2. Can I ping my peer on all three networks? Loop until true.
|
|
|
|
# - Wait here indefinately
|
|
|
|
# 3. ...
|
|
|
|
# 6. Using Start Groups/Delays (and ignoring 'clean' off VMs), boot servers.
|
|
|
|
|
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
# Check to see if we should run. Also checks/sets enable/disable requests.
|
|
|
|
prerun_checks($anvil);
|
|
|
|
|
2024-01-20 04:48:56 +00:00
|
|
|
# Wait until I can ping the peer on all networks. This will not return until access is available on all
|
2021-04-13 00:46:30 +00:00
|
|
|
# networks. There is no timeout.
|
|
|
|
wait_for_access($anvil);
|
|
|
|
|
2021-04-14 04:26:06 +00:00
|
|
|
# Start pacemaker now.
|
|
|
|
start_pacemaker($anvil);
|
|
|
|
|
|
|
|
# Boot servers.
|
|
|
|
boot_servers($anvil);
|
|
|
|
|
2021-05-14 03:27:38 +00:00
|
|
|
# Start DRBD resources locally for VMs running on the peer already
|
|
|
|
check_drbd($anvil);
|
|
|
|
|
2021-04-19 04:32:13 +00:00
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"});
|
2021-04-13 00:46:30 +00:00
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
|
|
|
|
#############################################################################################################
|
|
|
|
# Functions #
|
|
|
|
#############################################################################################################
|
|
|
|
|
2021-05-14 03:27:38 +00:00
|
|
|
sub check_drbd
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
2021-05-15 04:12:43 +00:00
|
|
|
# Find the servers running on the peer.
|
|
|
|
my $short_host_name = $anvil->Get->short_host_name();
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
|
|
my $peer_ip_address = $anvil->data->{sys}{peer_target_ip};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2023-07-25 17:07:38 +00:00
|
|
|
's1:short_host_name' => $short_host_name,
|
|
|
|
's2:peer_host_uuid' => $peer_host_uuid,
|
|
|
|
's3:peer_short_host_name' => $peer_short_host_name,
|
|
|
|
's4:peer_password' => $anvil->Log->is_secure($peer_password),
|
|
|
|
's5:peer_ip_address' => $peer_ip_address,
|
2021-05-15 04:12:43 +00:00
|
|
|
}});
|
2021-05-14 03:27:38 +00:00
|
|
|
|
2021-05-15 04:12:43 +00:00
|
|
|
# Get the list of resources up on the peer.
|
|
|
|
$anvil->DRBD->get_status({debug => 2});
|
|
|
|
$anvil->DRBD->get_status({
|
|
|
|
debug => 2,
|
|
|
|
password => $peer_password,
|
|
|
|
target => $peer_ip_address,
|
|
|
|
});
|
|
|
|
|
|
|
|
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}})
|
|
|
|
{
|
2023-07-25 17:07:38 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource => $resource }});
|
2021-05-15 04:12:43 +00:00
|
|
|
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}})
|
|
|
|
{
|
|
|
|
my $peer_is_me = $anvil->Network->is_local({host => $peer_name});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2023-07-25 17:07:38 +00:00
|
|
|
's1:peer_name' => $peer_name,
|
|
|
|
's2:peer_is_me' => $peer_is_me,
|
2021-05-15 04:12:43 +00:00
|
|
|
}});
|
|
|
|
|
|
|
|
my $peer_connection_state = $anvil->data->{drbd}{status}{$peer_ip_address}{resource}{$resource}{connection}{$peer_name}{'connection-state'};
|
|
|
|
my $local_connection_state = exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} ? $anvil->data->{drbd}{status}{$short_host_name}{resource}{$resource}{connection}{$peer_name}{'connection-state'} : "";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2023-07-25 17:07:38 +00:00
|
|
|
's1:peer_connection_state' => $peer_connection_state,
|
|
|
|
's2:local_connection_state' => $local_connection_state,
|
2021-05-15 04:12:43 +00:00
|
|
|
}});
|
|
|
|
|
|
|
|
if (($peer_connection_state =~ /Connecting/i) && ($local_connection_state !~ /StandAlone/i))
|
|
|
|
{
|
|
|
|
# Start the DRBD resource locally.
|
|
|
|
my $return_code = $anvil->DRBD->manage_resource({
|
|
|
|
debug => 2,
|
|
|
|
resource => $resource,
|
|
|
|
task => "up",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-05-14 03:27:38 +00:00
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
2021-04-14 04:26:06 +00:00
|
|
|
# This boots the servers.
|
|
|
|
sub boot_servers
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
2021-04-19 04:32:13 +00:00
|
|
|
# Call 'anvil-boot-server --server all' to boot the servers now.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0614"});
|
2021-05-24 04:09:32 +00:00
|
|
|
my $shell_call = $anvil->data->{path}{exe}{'anvil-boot-server'}." --server all".$anvil->Log->switches;
|
2021-04-19 04:32:13 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
if ($return_code)
|
|
|
|
{
|
|
|
|
# What?! Fail out, we're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0275", variables => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
2021-04-14 04:26:06 +00:00
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
# Start pacemaker and wait until we're quorate.
|
|
|
|
sub start_pacemaker
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $fenced_peer = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
anvil_uuid => $anvil_uuid,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
short_host_name => $short_host_name,
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Is pacemaker already running?
|
|
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Nope, start it.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"});
|
|
|
|
|
2022-01-18 07:38:50 +00:00
|
|
|
# NOTE: In some odd cases, this can try to run before /etc/hosts has been populated. So wait
|
|
|
|
# until we can access ourself.
|
|
|
|
my $ok = 0;
|
|
|
|
until ($ok)
|
|
|
|
{
|
|
|
|
# Convert out short host name to an IP and verify that the IP is one of ours.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0681"});
|
|
|
|
my $local_bcn1_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $short_host_name});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_bcn1_ip => $local_bcn1_ip }});
|
|
|
|
|
|
|
|
if ($local_bcn1_ip)
|
|
|
|
{
|
|
|
|
# Is this one of our IPs, or is DNS being a little shit?
|
|
|
|
if (exists $anvil->data->{network}{$short_host_name})
|
|
|
|
{
|
|
|
|
delete $anvil->data->{network}{$short_host_name};
|
|
|
|
}
|
|
|
|
$anvil->Network->get_ips();
|
|
|
|
|
|
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} eq "";
|
|
|
|
next if $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip} =~ /^127\.0\.0\./;
|
|
|
|
my $this_ip = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:interface' => $interface,
|
|
|
|
's2:this_ip' => $this_ip,
|
|
|
|
}});
|
|
|
|
if ($this_ip eq $local_bcn1_ip)
|
|
|
|
{
|
|
|
|
$ok = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ok => $ok }});
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (not $ok)
|
|
|
|
{
|
|
|
|
# Sleep 10 seconds.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "warning_0134"});
|
|
|
|
sleep 10;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-14 04:26:06 +00:00
|
|
|
### TODO: A lot more testing is needed for degraded single-node start later.
|
2021-04-19 04:32:13 +00:00
|
|
|
### Should we use --all, or wait for our peer? For now, we wait.
|
2023-04-18 18:33:58 +00:00
|
|
|
### NOTE: This can be racy during initial setup, calling the start before /etc/hosts is
|
|
|
|
### populated. So this watches for that corner case.
|
|
|
|
my $wait_until = time + 120;
|
|
|
|
my $waiting = 1;
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2023-04-18 18:33:58 +00:00
|
|
|
wait_until => $wait_until,
|
|
|
|
waiting => $waiting,
|
2021-04-14 04:26:06 +00:00
|
|
|
}});
|
2023-04-18 18:33:58 +00:00
|
|
|
while($waiting)
|
2021-04-14 04:26:06 +00:00
|
|
|
{
|
2023-04-18 18:33:58 +00:00
|
|
|
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";
|
|
|
|
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
|
|
|
|
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2021-04-14 04:26:06 +00:00
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
2023-04-18 18:33:58 +00:00
|
|
|
if ($return_code)
|
|
|
|
{
|
|
|
|
# Are we done waiting?
|
|
|
|
if (time > $wait_until)
|
|
|
|
{
|
|
|
|
# We're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0256", variables => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
}});
|
|
|
|
$anvil->nice_exit({exit_code => 1});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Report the error and sleep
|
|
|
|
my $time_left = $wait_until - time;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "alert", key => "warning_0153", variables => {
|
|
|
|
output => $output,
|
|
|
|
return_code => $return_code,
|
|
|
|
time_left => $time_left,
|
|
|
|
waiting => 10,
|
|
|
|
}});
|
|
|
|
sleep 10;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Success!
|
|
|
|
$waiting = 0;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
}
|
2021-04-14 04:26:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
### TODO: We may implement the logic to fence our peer (similar to cman's post_join_delay'
|
|
|
|
### logic) at a later time. For now, we'll wait forever for this to exit. This is why
|
|
|
|
### we set 'wait_for_peer', even though it's not used yet.
|
|
|
|
# Now wait up to two minutes for the cluster to start. If it's not up by then, we'll fence
|
|
|
|
# the peer and, if the fence succeeds, unblock quorum.
|
|
|
|
my $start_time = time;
|
|
|
|
my $wait_for_peer = $start_time + 120;
|
2023-04-18 18:33:58 +00:00
|
|
|
$waiting = 1;
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
start_time => $start_time,
|
|
|
|
wait_for_peer => $wait_for_peer,
|
|
|
|
}});
|
|
|
|
while ($waiting)
|
|
|
|
{
|
|
|
|
$waiting = 0;
|
2024-01-21 22:28:15 +00:00
|
|
|
my ($problem) = $anvil->Cluster->parse_cib({debug => 2});
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Can't parse the CIB yet, wait.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Quorum, as reported in the CIB, sets 'have-quorum to '1' as soon as it
|
|
|
|
# starts, the retracts it. For this reason, we use 'parse_quorum()' to get
|
|
|
|
# the quorum directly from corosync/votequorum.
|
|
|
|
my ($problem) = $anvil->Cluster->parse_quorum({debug => 2});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
|
|
|
|
if ($problem)
|
|
|
|
{
|
|
|
|
# Corosync is down.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
### NOTE: We don't worry about maintenance mode yet, as it shouldn't
|
|
|
|
### apply, but we may change that view later.
|
|
|
|
# See where we are.
|
|
|
|
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name};
|
|
|
|
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
|
|
|
|
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
|
|
|
|
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
|
|
|
|
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
|
|
|
|
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
|
|
|
|
my $quorate = $anvil->data->{quorum}{quorate};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:node_name' => $node_name,
|
|
|
|
's2:maintenance_mode' => $maintenance_mode,
|
|
|
|
's3:in_ccm/crmd/join' => $in_ccm."/".$crmd."/".$join,
|
|
|
|
's4:ready' => $ready,
|
|
|
|
's5:quorate' => $quorate,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Are we online?
|
|
|
|
if ($ready)
|
|
|
|
{
|
|
|
|
# We're ready, but do we have quorum?
|
|
|
|
if ($quorate)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0611", variables => { node_name => $node_name }});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Nope
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
|
|
|
|
# Keep waiting, or fence the peer?
|
|
|
|
if (time > $wait_for_peer)
|
|
|
|
{
|
2021-04-15 02:56:18 +00:00
|
|
|
### TODO: See above, not implemented yet. Do we want to do this? If so:
|
|
|
|
# Time to fence. Use 'pcs stonith fence <peer>', verify it succeeded,
|
|
|
|
# then do 'pcs quorum unblock --force' to finish startup.
|
2021-04-14 04:26:06 +00:00
|
|
|
}
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0610", variables => { node_name => $node_name }});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Not ready yet.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0612", variables => {
|
|
|
|
node_name => $node_name,
|
|
|
|
in_ccm => $in_ccm,
|
|
|
|
crmd => $crmd,
|
|
|
|
'join' => $join,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($waiting)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0609"});
|
|
|
|
sleep 5;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
2021-04-13 00:46:30 +00:00
|
|
|
# Check for which networks we have and verify that we can ping our peer on each. This function will not
|
|
|
|
# return until all networks are up.
|
|
|
|
sub wait_for_access
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
|
|
|
|
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
|
|
|
|
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
|
|
|
|
my $peer_password = $anvil->data->{sys}{peer_password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
short_host_name => $short_host_name,
|
|
|
|
peer_host_uuid => $peer_host_uuid,
|
|
|
|
peer_short_host_name => $peer_short_host_name,
|
|
|
|
peer_password => $anvil->Log->is_secure($peer_password),
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $waiting = 1;
|
|
|
|
while ($waiting)
|
|
|
|
{
|
|
|
|
# This will get set back to '1' if
|
|
|
|
$waiting = 0;
|
|
|
|
|
|
|
|
# Load IPs (again, to catch changes that might be delaying startup)
|
|
|
|
$anvil->Network->load_ips({
|
|
|
|
clear => 1,
|
|
|
|
host => $short_host_name,
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
|
|
|
|
});
|
|
|
|
$anvil->Network->load_ips({
|
|
|
|
clear => 1,
|
|
|
|
host => $peer_short_host_name,
|
|
|
|
host_uuid => $peer_host_uuid,
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
# Loop through our interfaces and then loop our peers. Test access over them and set
|
|
|
|
# 'waiting' back to '1' if the connection fails.
|
|
|
|
foreach my $interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
interface => $interface,
|
|
|
|
waiting => $waiting,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# Only care about our networks.
|
|
|
|
next if $waiting;
|
2021-10-12 01:57:35 +00:00
|
|
|
if (not $anvil->Network->is_our_interface({interface => $interface}))
|
2021-04-13 00:46:30 +00:00
|
|
|
{
|
|
|
|
# Not an interface we care about
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $this_network = ($interface =~ /^(.*?)_/)[0];
|
|
|
|
my $ip_address = $anvil->data->{network}{$short_host_name}{interface}{$interface}{ip};
|
|
|
|
my $subnet_mask = $anvil->data->{network}{$short_host_name}{interface}{$interface}{subnet_mask};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
's1:this_network' => $this_network,
|
|
|
|
's2:ip_address' => $ip_address,
|
|
|
|
's3:subnet_mask' => $subnet_mask,
|
|
|
|
}});
|
|
|
|
|
|
|
|
### NOTE: I know I could match interface names, but that's not certain enough. It's
|
|
|
|
### possible (if unlikely) that the network name+numbre differs on our peer. So
|
|
|
|
### this is safer.
|
|
|
|
# Loop through my peer's interfaces and see if we're sharing this one.
|
|
|
|
my $local_network = NetAddr::IP->new($ip_address."/".$subnet_mask);
|
|
|
|
my $peer_match_found = 0;
|
|
|
|
foreach my $peer_interface (sort {$a cmp $b} keys %{$anvil->data->{network}{$peer_short_host_name}{interface}})
|
|
|
|
{
|
|
|
|
last if $peer_match_found;
|
|
|
|
my $peer_ip_address = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{ip};
|
|
|
|
my $peer_subnet_mask = $anvil->data->{network}{$peer_short_host_name}{interface}{$peer_interface}{subnet_mask};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_interface => $peer_interface,
|
|
|
|
peer_ip_address => $peer_ip_address,
|
|
|
|
peer_subnet_mask => $peer_subnet_mask,
|
|
|
|
}});
|
|
|
|
|
|
|
|
# This the matching network?
|
|
|
|
next if $subnet_mask ne $peer_subnet_mask;
|
|
|
|
|
|
|
|
my $peer_network = NetAddr::IP->new($peer_ip_address."/".$peer_subnet_mask);
|
|
|
|
if ($peer_network->within($local_network))
|
|
|
|
{
|
|
|
|
# Match, test access.
|
|
|
|
$peer_match_found = 1;
|
|
|
|
my $access = $anvil->Remote->test_access({
|
|
|
|
target => $peer_ip_address,
|
|
|
|
password => $peer_password,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
|
|
|
|
if ($access)
|
|
|
|
{
|
|
|
|
# This network is good.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0604", variables => {
|
|
|
|
peer => $peer_short_host_name,
|
|
|
|
network => $this_network,
|
|
|
|
peer_ip => $peer_ip_address,
|
|
|
|
}});
|
2021-05-15 04:12:43 +00:00
|
|
|
|
|
|
|
$anvil->data->{sys}{peer_target_ip} = $peer_ip_address;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
"sys::peer_target_ip" => $anvil->data->{sys}{peer_target_ip},
|
|
|
|
}});
|
2021-04-13 00:46:30 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# No access, wait and try it again.
|
|
|
|
$waiting = 1;
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0605", variables => {
|
|
|
|
peer => $peer_short_host_name,
|
|
|
|
network => $this_network,
|
|
|
|
peer_ip => $peer_ip_address,
|
|
|
|
}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($waiting)
|
|
|
|
{
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0606", variables => { peer => $peer_short_host_name }});
|
|
|
|
sleep 5;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# All networks are up.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0607", variables => { peer => $peer_short_host_name }});
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
# This checks to verify that we're a node, and if so, if this tool is enabled. If it's disabled or this isn't
|
|
|
|
# a node, this method will exit.
|
|
|
|
sub prerun_checks
|
|
|
|
{
|
|
|
|
my ($anvil) = @_;
|
|
|
|
|
|
|
|
$anvil->Database->get_hosts();
|
|
|
|
$anvil->Database->get_anvils();
|
|
|
|
|
|
|
|
my $host_uuid = $anvil->Get->host_uuid();
|
|
|
|
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
host_uuid => $host_uuid,
|
|
|
|
host_type => $host_type,
|
|
|
|
}});
|
|
|
|
|
2023-07-25 02:32:41 +00:00
|
|
|
if (($host_type eq "node") or ($host_type eq "dr"))
|
|
|
|
{
|
|
|
|
# Call DRBD->get_status because, if we're just starting up and the kernel module needs to be
|
|
|
|
# built, do it before we start calling scan agents.
|
|
|
|
$anvil->DRBD->get_status({debug => 2});
|
|
|
|
}
|
|
|
|
|
2021-04-12 04:28:24 +00:00
|
|
|
if ($host_type ne "node")
|
|
|
|
{
|
|
|
|
# We're done.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0598"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }});
|
|
|
|
|
|
|
|
if (not $anvil_uuid)
|
|
|
|
{
|
|
|
|
# This is a node, but not in an Anvil! yet.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0603"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
|
|
|
|
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
|
2021-04-13 00:46:30 +00:00
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
node1_host_uuid => $node1_host_uuid,
|
|
|
|
node2_host_uuid => $node2_host_uuid,
|
|
|
|
}});
|
|
|
|
|
2021-04-14 04:26:06 +00:00
|
|
|
$anvil->data->{sys}{anvil_uuid} = $anvil_uuid;
|
2021-04-13 00:46:30 +00:00
|
|
|
$anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
|
|
|
|
$anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
2021-04-14 04:26:06 +00:00
|
|
|
"sys::anvil_uuid" => $anvil->data->{sys}{anvil_uuid},
|
2021-04-13 00:46:30 +00:00
|
|
|
"sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid},
|
|
|
|
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}),
|
|
|
|
}});
|
2021-04-12 04:28:24 +00:00
|
|
|
|
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
|
|
|
# We don't use this anymore, it's managed by the 'anvil-safe-start.service' daemon.
|
|
|
|
=cut
|
2021-04-12 04:28:24 +00:00
|
|
|
# Are we being asked to enable or disable?
|
|
|
|
my $nodes = [$host_uuid];
|
|
|
|
my $set_to = 1;
|
|
|
|
my $message = "";
|
|
|
|
if ($anvil->data->{switches}{enable})
|
|
|
|
{
|
|
|
|
# We're enabling, which message will we use?
|
|
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0599" : "log_0600";
|
|
|
|
}
|
|
|
|
elsif ($anvil->data->{switches}{disable})
|
|
|
|
{
|
|
|
|
# We're disabling. Which message?
|
|
|
|
$set_to = 0;
|
|
|
|
$message = $anvil->data->{switches}{'local'} ? "log_0601" : "log_0602";
|
|
|
|
}
|
|
|
|
|
|
|
|
# If we're updating the settings, do so and then exit.
|
|
|
|
if ($message)
|
|
|
|
{
|
|
|
|
if (not $anvil->data->{switches}{'local'})
|
|
|
|
{
|
|
|
|
# Add our peer as well.
|
2021-04-13 00:46:30 +00:00
|
|
|
push @{$nodes}, $anvil->data->{sys}{peer_host_uuid};
|
2021-04-12 04:28:24 +00:00
|
|
|
}
|
|
|
|
foreach my $host_uuid (@{$nodes})
|
|
|
|
{
|
|
|
|
my ($variable_uuid) = $anvil->Database->insert_or_update_variables({
|
2021-04-13 00:46:30 +00:00
|
|
|
debug => 3,
|
2021-04-12 04:28:24 +00:00
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_value => $set_to,
|
|
|
|
variable_default => 1,
|
|
|
|
variable_description => "striker_0286",
|
|
|
|
variable_section => "system",
|
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Record that it's been enabled.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Read my variables.
|
|
|
|
my ($local_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
2021-04-13 00:46:30 +00:00
|
|
|
debug => 3,
|
2021-04-12 04:28:24 +00:00
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_source_table => "hosts",
|
|
|
|
variable_source_uuid => $host_uuid,
|
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
local_enabled => $local_enabled,
|
|
|
|
variable_uuid => $variable_uuid,
|
|
|
|
}});
|
|
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
|
|
if (not $variable_uuid)
|
|
|
|
{
|
|
|
|
$local_enabled = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_enabled => $local_enabled }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# Have we just been asked for the status?
|
|
|
|
if ($anvil->data->{switches}{status})
|
|
|
|
{
|
|
|
|
# Yes, check our peer as well.
|
|
|
|
my ($peer_enabled, $variable_uuid, $mtime, $modified_date) = $anvil->Database->read_variable({
|
2021-04-13 00:46:30 +00:00
|
|
|
debug => 3,
|
2021-04-12 04:28:24 +00:00
|
|
|
variable_name => "tool::anvil-safe-start::enabled",
|
|
|
|
variable_source_table => "hosts",
|
2021-04-13 00:46:30 +00:00
|
|
|
variable_source_uuid => $anvil->data->{sys}{peer_host_uuid},
|
2021-04-12 04:28:24 +00:00
|
|
|
});
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
peer_enabled => $peer_enabled,
|
|
|
|
variable_uuid => $variable_uuid,
|
|
|
|
}});
|
|
|
|
# No UUID means the value hasn't been recorded, so we default to 1.
|
|
|
|
if (not $variable_uuid)
|
|
|
|
{
|
|
|
|
$peer_enabled = 1;
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_enabled => $peer_enabled }});
|
|
|
|
}
|
|
|
|
|
|
|
|
# What we tell the use slightly depends on which nodes are enabled.
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
|
|
|
|
local_enabled => $local_enabled,
|
|
|
|
peer_enabled => $peer_enabled,
|
|
|
|
}});
|
|
|
|
|
|
|
|
my $message = "";
|
|
|
|
if (($local_enabled) && ($peer_enabled))
|
|
|
|
{
|
|
|
|
# Both nodes are enabled.
|
|
|
|
$message = "message_0227";
|
|
|
|
}
|
|
|
|
elsif ((not $local_enabled) && (not $peer_enabled))
|
|
|
|
{
|
|
|
|
# Both nodes are disabled
|
|
|
|
$message = "message_0228";
|
|
|
|
}
|
|
|
|
elsif ($local_enabled)
|
|
|
|
{
|
|
|
|
# We're enabled, the peer is disabled.
|
|
|
|
$message = "message_0229";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# We're disabled, the peer is enabled.
|
|
|
|
$message = "message_0230";
|
|
|
|
}
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => $message});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
|
|
|
=cut
|
2021-04-12 04:28:24 +00:00
|
|
|
|
2021-04-13 00:46:30 +00:00
|
|
|
# Is another instance running?
|
|
|
|
my $pids = $anvil->System->pids({
|
|
|
|
debug => 3,
|
|
|
|
ignore_me => 1,
|
|
|
|
program_name => $THIS_FILE,
|
|
|
|
});
|
|
|
|
my $other_instances = @{$pids};
|
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { other_instances => $other_instances }});
|
|
|
|
|
|
|
|
if ($other_instances)
|
|
|
|
{
|
2023-07-23 00:03:39 +00:00
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0233", variables => { program => $THIS_FILE }});
|
|
|
|
sleep 2;
|
2021-04-13 00:46:30 +00:00
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
|
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
|
|
|
=cut
|
2021-04-13 00:46:30 +00:00
|
|
|
# Last test, enabled or forced?
|
2021-04-12 04:28:24 +00:00
|
|
|
if (not $local_enabled)
|
|
|
|
{
|
|
|
|
# Disabled. Forced?
|
|
|
|
if ($anvil->data->{switches}{force})
|
|
|
|
{
|
|
|
|
# Forced, run anyway.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0232"});
|
2021-04-13 00:46:30 +00:00
|
|
|
return(0);
|
2021-04-12 04:28:24 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
# Exit.
|
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "message_0231"});
|
|
|
|
$anvil->nice_exit({exit_code => 0});
|
|
|
|
}
|
|
|
|
}
|
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
|
|
|
=cut
|
2021-04-12 04:28:24 +00:00
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|