2021-04-18 23:54:58 +00:00
#!/usr/bin/perl
#
# This does shutdown-time tasks; migrate or stop servers, withdraw and power off the host.
#
# Exit codes;
# 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
# TODO:
#
2022-09-22 03:35:06 +00:00
# BUG:
# - --poweroff when the peer is offline tries to migrate anyway.
# -
2021-04-18 23:54:58 +00:00
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
2021-04-23 04:04:20 +00:00
use Data::Dumper;
2021-04-18 23:54:58 +00:00
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
# Read switches (target ([user@]host[:port]) and the file with the target's password.
$anvil->Get->switches({list => [
"no-db",
"poweroff",
"power-off",
"stop-reason",
"stop-servers"], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
2021-04-23 04:04:20 +00:00
2021-08-07 18:01:14 +00:00
# Let 'poweroff' work as a mis-spell of 'power-off'
if (($anvil->data->{switches}{'poweroff'}) && (not $anvil->data->{switches}{'power-off'}))
{
$anvil->data->{switches}{'power-off'} = $anvil->data->{switches}{'poweroff'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::power-off' => $anvil->data->{switches}{'power-off'},
}});
}
2021-04-23 15:51:28 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
2021-04-18 23:54:58 +00:00
# Make sure we're running as 'root'
# $< == real UID, $> == effective UID
if (($< != 0) && ($> != 0))
{
# Not root
print $anvil->Words->string({key => "error_0005"})."\n";
$anvil->nice_exit({exit_code => 1});
}
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
if ($anvil->data->{switches}{'no-db'})
2021-04-18 23:54:58 +00:00
{
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->data->{sys}{database}{connections} = 0;
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'sys::database::connections' => $anvil->data->{sys}{database}{connections},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
}
else
{
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
2021-04-23 04:04:20 +00:00
}
# If we still don't have a job-uuit, go into interactive mode.
if ($anvil->data->{switches}{'job-uuid'})
{
# Load the job data.
$anvil->Job->clear();
$anvil->Job->get_job_details();
$anvil->Job->update_progress({
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0235",
});
2021-04-18 23:54:58 +00:00
2021-04-23 04:04:20 +00:00
# Pull out the job data.
foreach my $line (split/\n/, $anvil->data->{jobs}{job_data})
2021-04-18 23:54:58 +00:00
{
2021-04-23 04:04:20 +00:00
if ($line =~ /power-off=(.*?)$/)
{
$anvil->data->{switches}{'power-off'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::power-off' => $anvil->data->{switches}{'power-off'},
}});
2022-02-25 02:56:02 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0691!#" }});
2021-04-23 04:04:20 +00:00
}
if ($line =~ /stop-reason=(.*?)$/)
{
$anvil->data->{switches}{'stop-reason'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::stop-reason' => $anvil->data->{switches}{'stop-reason'},
}});
}
if ($line =~ /stop-servers=(.*?)$/)
2021-04-18 23:54:58 +00:00
{
2021-04-23 04:04:20 +00:00
$anvil->data->{switches}{'stop-servers'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::stop-servers' => $anvil->data->{switches}{'stop-servers'},
}});
2021-04-18 23:54:58 +00:00
}
}
}
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
# Make sure we're a subnode or DR host
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "node") && ($host_type ne "dr"))
2021-04-23 04:04:20 +00:00
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 100, message => "error_0260"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
$anvil->nice_exit({exit_code => 1});
}
2021-05-08 06:02:46 +00:00
# If no stop-reason was set, set it to 'user'
if (not $anvil->data->{switches}{'stop-reason'})
{
$anvil->data->{switches}{'stop-reason'} = "user";
2021-08-07 18:01:14 +00:00
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::stop-reason' => $anvil->data->{switches}{'stop-reason'},
}});
2021-05-08 06:02:46 +00:00
}
2021-04-23 04:04:20 +00:00
# Migrate or stop the servers, if any servers are running here.
process_servers($anvil);
# This waits on DRBD if we're SyncSource
wait_on_drbd($anvil);
2021-04-23 15:51:28 +00:00
# This stops pacemaker
2021-04-23 04:04:20 +00:00
stop_cluster($anvil);
2021-04-23 15:51:28 +00:00
# Are we powering off?
if ($anvil->data->{switches}{'power-off'})
{
# Yup
$anvil->Database->update_host_status({
debug => 2,
host_uuid => $anvil->Get->host_uuid,
host_status => "stopping",
});
2022-02-25 02:56:02 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0687", variables => { reason => "#!string!log_0692!#" }});
2021-04-23 15:51:28 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0325"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 100, message => "job_0325"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 15:51:28 +00:00
2021-08-07 18:01:14 +00:00
# Set the stop reason.
if ($anvil->data->{switches}{'stop-reason'})
{
if ($anvil->data->{switches}{'stop-reason'} eq "none")
{
$anvil->data->{switches}{'stop-reason'} = "";
}
my $variable_uuid = $anvil->Database->insert_or_update_variables({
variable_name => 'system::stop_reason',
variable_value => $anvil->data->{switches}{'stop-reason'},
variable_default => '',
variable_description => 'striker_0279',
variable_section => 'system',
variable_source_uuid => $anvil->Get->host_uuid(),
variable_source_table => 'hosts',
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { variable_uuid => $variable_uuid }});
}
2021-04-23 15:51:28 +00:00
my $shell_call = $anvil->data->{path}{exe}{systemctl}." poweroff";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
# Unlikely we're still alive, but 'poweroff' does return once enqueued, so...
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
else
{
# We're not shutting down, so we're done
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0326"});
$anvil->Job->update_progress({progress => 100, message => "job_0326"});
}
2021-04-18 23:54:58 +00:00
$anvil->nice_exit({exit_code => 0});
2021-04-23 15:51:28 +00:00
2021-04-18 23:54:58 +00:00
#############################################################################################################
# Functions #
#############################################################################################################
2021-04-23 04:04:20 +00:00
2021-04-23 15:51:28 +00:00
# This takes down or migrates VMs, then withdraws from the cluster.
sub stop_cluster
{
my ($anvil) = @_;
# We need to rename the server in the cluster, and we need both nodes up to do it.
my $pacemaker_stopped = 0;
my $waiting = 1;
while($waiting)
{
$waiting = 0;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Cluster has stopped.
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
2021-05-01 02:58:01 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 5, message => "job_0313"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 15:51:28 +00:00
}
else
{
$waiting = 1;
if (not $pacemaker_stopped)
{
# Stop pacemaker now.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0323"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 70, message => "job_0323"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 15:51:28 +00:00
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
2021-05-01 02:58:01 +00:00
### NOTE: '--force' is needed or else sole-running nodes can't exit
### (complains about the loss of quorum)
2024-03-29 09:45:49 +00:00
my $shell_call = $anvil->data->{path}{exe}{pcs_direct}." cluster stop --force";
2021-04-23 15:51:28 +00:00
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
$pacemaker_stopped = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { pacemaker_stopped => $pacemaker_stopped }});
}
else
{
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
2021-05-01 02:58:01 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0324"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 80, message => "job_0324"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 15:51:28 +00:00
}
}
if ($waiting)
{
sleep 5;
}
}
return(0);
}
2021-04-23 04:04:20 +00:00
# This will migrate or stop
sub process_servers
{
my ($anvil) = @_;
2022-08-31 22:12:07 +00:00
# Use virsh to check for servers, in case pacemaker lies to us.
$anvil->Server->find();
my $progress = 10;
my $waiting = 1;
my $first_try = 0;
my $second_try = 0;
my $try_again = 0;
my $server_count = keys %{$anvil->data->{server}{location}};
my $progress_steps = $server_count ? int(35 / $server_count) : 70;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server_count' => $server_count,
's2:progress_steps' => $progress_steps,
}});
2022-09-22 03:35:06 +00:00
2023-07-15 20:19:21 +00:00
# If we have one or more local servers, we need to know if both subnodes are in the node's cluster.
# If we're not, or the peer isn't, we can't migrate.
my $can_migrate = 1;
2022-09-22 03:35:06 +00:00
if ($server_count)
{
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
if ($anvil->Get->host_type() eq "dr")
2022-09-22 03:35:06 +00:00
{
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
# No pacemaker, only stop servers.
2022-09-22 03:35:06 +00:00
$can_migrate = 0;
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, key => "job_0470"});
2022-09-22 03:35:06 +00:00
}
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
else
2022-09-22 03:35:06 +00:00
{
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:problem' => $problem,
's2:cib::parsed::local::ready' => $anvil->data->{cib}{parsed}{'local'}{ready},
's3:cib::parsed::peer::ready' => $anvil->data->{cib}{parsed}{peer}{ready},
}});
if ($problem)
{
# We're not in the node's cluster, we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
elsif ((not $anvil->data->{cib}{parsed}{'local'}{ready}) or (not $anvil->data->{cib}{parsed}{peer}{ready}))
{
# One of the subnodes is not in the cluster, so we can't migrate.
$can_migrate = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { can_migrate => $can_migrate }});
}
2022-09-22 03:35:06 +00:00
}
}
if ($anvil->data->{switches}{'stop-servers'})
{
# Tell the user we're about to shut down servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0320"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 10, message => "job_0320"}) if $anvil->data->{switches}{'job-uuid'};
2022-09-22 03:35:06 +00:00
}
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
elsif ($can_migrate)
2022-09-22 03:35:06 +00:00
{
# Tell the user we're about to migrate servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0321"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 10, message => "job_0321"}) if $anvil->data->{switches}{'job-uuid'};
}
else
{
# We would have to stop the servers, and the user didn't tell us to do that, abort.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0372"});
$anvil->Job->update_progress({progress => 100, message => "error_0372"}) if $anvil->data->{switches}{'job-uuid'};
$anvil->nice_exit({exit_code => 1});
2022-09-22 03:35:06 +00:00
}
2021-04-23 04:04:20 +00:00
while ($waiting)
{
# Is the cluster up?
$waiting = 0;
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
2022-08-31 22:12:07 +00:00
# Nope. Are we stopping servers?
if ($anvil->data->{switches}{'stop-servers'})
{
# Yes, are any servers running (check virsh)
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{server}{location}})
{
my $status = $anvil->data->{server}{location}{$server_name}{status};
my $host_name = $anvil->data->{server}{location}{$server_name}{host_name};
$progress += $progress_steps;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server_name' => $server_name,
's2:status' => $status,
's3:host_name' => $host_name,
's4:progress' => $progress,
}});
if ($host_name eq $anvil->Get->host_name)
{
# Server is still running.
if (($status eq "running") && (not $first_try))
{
# It's running despite the cluster being own, stop it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0419", variables => { server => $server_name }});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => $progress, message => "job_0419,!!server!".$server_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
2022-08-31 22:12:07 +00:00
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
wait_time => 1,
});
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
elsif (($status eq "in shutdown") && ($try_again))
{
# Hit the power button again.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "job_0420", variables => { server => $server_name }});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => $progress, message => "job_0420,!!server!".$server_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
2022-08-31 22:12:07 +00:00
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server_name,
wait_time => 1,
});
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
}
if ($waiting)
{
if (not $first_try)
{
$first_try = time;
$second_try = $first_try + 120;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
first_try => $first_try,
second_try => $second_try,
}});
}
elsif ($try_again)
{
$try_again = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { try_again => $try_again }});
}
elsif (($second_try) && (time > $second_try))
{
$try_again = 1;
$second_try = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
second_try => $second_try,
try_again => $try_again,
}});
}
}
}
2021-04-23 04:04:20 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0313"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 80, message => "job_0313"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
}
else
{
# Loop through the servers running here.
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}})
{
my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status};
my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name};
my $role = $anvil->data->{cib}{parsed}{data}{server}{$server}{role};
my $active = $anvil->data->{cib}{parsed}{data}{server}{$server}{active};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server' => $server,
's2:status' => $status,
2022-08-31 22:12:07 +00:00
's3:host_name' => $host_name,
2021-04-23 04:04:20 +00:00
's4:role' => $role,
's5:active' => $active,
}});
2021-04-23 15:51:28 +00:00
next if lc($role) eq "stopped";
2021-04-23 04:04:20 +00:00
if (lc($role) eq "migrating")
{
# No matter what, if a server is migrating, we wait.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0315", variables => { server => $server }});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 20, message => "job_0315,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
}
elsif ($host_name eq $local_name)
{
# Something is running here.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
# This is ours. How shall we deal with it?
if ($anvil->data->{switches}{'stop-servers'})
{
# Have we tried to stop it already? If not, use pcs. If so,
# and if it's been more that 60 seconds, use virsh to try
# again.
if (not exists $anvil->data->{server_shutdown}{$server})
{
# Use PCS.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0316", variables => { server => $server }});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 20, message => "job_0316,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
$anvil->Cluster->shutdown_server({
debug => 2,
server => $server,
'wait' => 0,
});
$anvil->data->{server_shutdown}{$server}{pcs_called} = 1;
$anvil->data->{server_shutdown}{$server}{virsh_called} = 0;
$anvil->data->{server_shutdown}{$server}{call_virsh_at} = time + 120;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server_shutdown::${server}::pcs_called" => $anvil->data->{server_shutdown}{$server}{pcs_called},
"server_shutdown::${server}::virsh_called" => $anvil->data->{server_shutdown}{$server}{virsh_called},
"server_shutdown::${server}::call_virsh_at" => $anvil->data->{server_shutdown}{$server}{call_virsh_at},
}});
}
elsif ((not $anvil->data->{server_shutdown}{$server}{virsh_called}) && (time > $anvil->data->{server_shutdown}{$server}{call_virsh_at}))
{
# Use virsh
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0317", variables => { server => $server }});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 20, message => "job_0317,!!server!".$server."!!"}) if $anvil->data->{switches}{'job-uuid'};
The core logic is done!!!! Still need to finish end-points for the WebUI to hook into, but the core of M3 is complete! Many, many bugs are expected, of course. :)
* Created DRBD->check_if_syncsource() and ->check_if_synctarget() that return '1' if the target host is currently SyncSource or SyncTarget for any resource, respectively.
* Updated DRBD->update_global_common() to return the unified-format diff if any changes were made to global-common.conf.
* Created ScanCore->check_health() that returns the health score for a host. Created ->count_servers() that returns the number of servers on a host, how much RAM is used by those servers and, if available, the estimated migration time of the servers. Updated ->check_temperature() to set/clear/return the time that a host has been in a warning or critical temperature state.
* Finished ScanCore->post_scan_analysis_node()!!! It certainly has bugs, and much testing is needed, but the logic is all in place! Oh what a slog that was... It should be far more intelligent than M2 though, once flushed out and tested.
* Created Server->active_migrations() that returns '1' if any servers are in a migration on an Anvil! system. Updated ->migrate_virsh() to record how long a migration took in the "server::migration_duration" variable, which is averaged by ScanCore->count_servers() to estimate migration times.
* Updated scan-drbd to check/update the global-common.conf file's config at the end of a scan.
* Updated ScanCore itself to not scan when in maintenance mode. Also updated it to call 'anvil-safe-start' when ScanCore starts, so long as it is within ten minutes of the host booting.
Signed-off-by: Digimer <digimer@alteeve.ca>
2021-05-01 02:58:01 +00:00
$anvil->Server->shutdown_virsh({
debug => 2,
server => $server,
wait_time => 1,
2021-04-23 04:04:20 +00:00
});
$anvil->data->{server_shutdown}{$server}{virsh_called} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"server_shutdown::${server}::virsh_called" => $anvil->data->{server_shutdown}{$server}{virsh_called},
}});
}
}
else
{
### TODO: Calculate how many gigs worth of RAM we'll migrate,
### and advance the "progress" by the percentage each
### server's RAM represents of the total
# Migrate the servers.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0318", variables => {
server => $server,
node => $peer_name,
}});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 20, message => "job_0318,!!server!".$server."!!,!!node!".$peer_name."!!"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
$anvil->Cluster->migrate_server({
server => $server,
node => $peer_name,
'wait' => 1,
});
}
}
}
}
if ($waiting)
{
sleep 5;
}
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0319"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 30, message => "job_0319"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
return(0);
}
# This watches DRBD and waits for us to not be SyncSource.
sub wait_on_drbd
{
my ($anvil) = @_;
2021-04-23 15:51:28 +00:00
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0322"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 40, message => "job_0322"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
my $short_host_name = $anvil->Get->short_host_name();
my $waiting = 1;
while ($waiting)
{
# (Re)fresh my view of the storage.
$waiting = 0;
$anvil->DRBD->get_status({debug => 2});
# Now check to see if anything is sync'ing.
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}})
{
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_name => $peer_name }});
foreach my $volume (sort {$a cmp $b} %{$anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}})
{
next if not exists $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
my $replication_state = $anvil->data->{drbd}{status}{$short_host_name}{resource}{$server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume,
replication_state => $replication_state,
}});
if ($replication_state =~ /SyncSource/i)
{
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0312", variables => {
peer_host => $peer_name,
resource => $server_name,
volume => $volume,
}});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 50, message => "job_0312,!!peer_host!".$peer_name."!!,!!resource!".$server_name."!!,!!volume!".$volume."!!"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
}
}
}
}
if ($waiting)
{
sleep 10;
}
}
# All servers should be down now, so stop DRBD.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0314"});
Major thing in this commit is reworking striker-update-cluster to work without expecting anvil-daemon to be running on target machines. Similarly, they had to be able to work when the Striker DBs were not available. This is to account for cases where the Striker dashboards have updated, and the schema has changed, preventing the not-yet-updated DR hosts and subnodes from being able to use the DB. To do this, anvil-safe-stop, anvil-update-system, and anvil-shutdown-server had to be updated to use the new --no-db switch, which tells then to run without the database being available.
* Updated Server->shutdown_virsh() to work without a database connection.
* Updated System->reboot_needed() to store/read from a cache file when the database is not available.
* Updated anvil-safe-start to remove the old --enable/disable/status switches, now that we use anvil-safe-start.service systemd unit.
* Reworked anvil-safe-stop to work without a database connection, and to work on DR hosts.
* Updated anvil-special-operations to add new tasks, but it's likely these new tasks aren't needed and will be removed very shortly.
* Added/updated multiple man pages.
Signed-off-by: digimer <mkelly@alteeve.ca>
2023-07-22 22:09:01 +00:00
$anvil->Job->update_progress({progress => 60, message => "job_0314"}) if $anvil->data->{switches}{'job-uuid'};
2021-04-23 04:04:20 +00:00
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." down all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
2021-04-23 15:51:28 +00:00
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
2021-04-23 04:04:20 +00:00
return(0);
}