diff --git a/ocf/alteeve/server b/ocf/alteeve/server
index 995cc1f2..087dc97d 100755
--- a/ocf/alteeve/server
+++ b/ocf/alteeve/server
@@ -271,7 +271,7 @@ $anvil->nice_exit({exit_code => 255});
# This will either verify that 'libvirtd' and 'drbd' are running (and start them if not) is called with
# "start". If called with "stop", a check is made on both nodes. If all VMs are gone, "libvirtd" and "drbd"
# are stopped.
-sub check_services
+sub check_daemons
{
my ($anvil, $task) = @_;
@@ -368,7 +368,10 @@ sub check_services
if ($return_code eq "3")
{
# Stopped, start it..
- print "Starting: [".$daemon."] on: [".$peer_name."]\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0486", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
@@ -395,22 +398,34 @@ sub check_services
if ($return_code eq "0")
{
$running = 1;
- print "Verified start of: [".$daemon."] on: [".$peer_name."]\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0487", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
}
else
{
$loops++;
- if ($loops > 3)
+ if ($loops > 5)
{
+ ### TODO: We may want to NOT die here, if
+ ### we're booting a server (though we
+ ### will if we're migrating).
# Give up
- print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n";
- die;
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0135", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
+ $anvil->nice_exit({exit_code => 1});
}
else
{
# Wait for a second.
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0488", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
sleep 1;
- print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
}
}
}
@@ -418,7 +433,10 @@ sub check_services
elsif ($return_code eq "0")
{
# Running, nothing to do.
- print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0485", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
}
}
}
@@ -484,7 +502,16 @@ sub check_services
}});
if ((not $local_vm_count) && (not $remote_vm_count))
{
- print "No servers running on either node, stopping daemons.\n";
+ if ($peer_ready)
+ {
+ # No servers running on either node.
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0490"});
+ }
+ else
+ {
+ # No servers running here and the peer is not in the cluster.
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0491"});
+ }
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
@@ -498,12 +525,12 @@ sub check_services
if ($local_return_code eq "3")
{
# Already stopped.
- print "The daemon: [".$daemon."] is already stopped locally.\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0492", variables => { daemon => $daemon }});
}
elsif ($local_return_code eq "0")
{
# Running, stop it.
- print "Stopping: [".$daemon."] locally\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0493", variables => { daemon => $daemon }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
@@ -523,12 +550,18 @@ sub check_services
if ($remote_return_code eq "3")
{
# Already stopped.
- print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0494", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
}
elsif ($remote_return_code eq "0")
{
# Running, stop it.
- print "Stopping: [".$daemon."] on: [".$peer_name."]\n";
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0495", variables => {
+ daemon => $daemon,
+ host => $peer_name,
+ }});
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
@@ -541,9 +574,13 @@ sub check_services
}
}
}
+ else
+ {
+ # Servers are still running, don't stop the daemons.
+ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0496"});
+ }
}
-
return(0);
}
@@ -576,7 +613,7 @@ sub start_server
my ($anvil) = @_;
# Before we do anything, make sure that 'libvirtd' and 'drbd' services are running.
- check_services($anvil, "start");
+ check_daemons($anvil, "start");
# Start procedure;
# 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails.
@@ -903,6 +940,9 @@ sub stop_server
stop_drbd_resource($anvil);
}
+ # If this was the last running server, stop the daemons.
+ check_daemons($anvil, "stop");
+
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
@@ -934,6 +974,9 @@ sub migrate_server
{
my ($anvil) = @_;
+ # Before migrating, make sure the daemons are running on the peer.
+ check_daemons($anvil, "start");
+
### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a
### user might want to do this (ie: sync will be done soon and the need to evacuate the node
### ASAP is high). Maybe we'll enforce this and require a '--force' switch later?
diff --git a/share/words.xml b/share/words.xml
index b3b16c53..b37b9c88 100644
--- a/share/words.xml
+++ b/share/words.xml
@@ -194,6 +194,7 @@ The error was:
Failed to remove the symlink: [#!variable!symlink!#]!
Failed to read or parse the CIB! Is pacemaker running?
Failed to start the daemon: [#!variable!daemon!#] on the local system, unable to boot the server.
+ Failed to start the daemon: [#!variable!daemon!#] on [#!variable!host!#], unable to boot the server.
Current Network Interfaces and States
@@ -712,7 +713,7 @@ We will keep looking.
We were asked to promote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.
We were asked to demote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.
We were asked to notify, but this is not a promotable (we're stateless) agent. Ignoring.
- We were invoked with an unexpected (or no) command. Environment variables and arguments below.
+ We were invoked with an unexpected (or no) command. Environment variables and arguments have been logged.
We've been asked to start the server: [#!variable!server!#].
It appears that the list the currently running servers returned a non-zero return code: [#!variable!return_code!#]. We will proceed as we may be able to fix this. The output, if any, was: [#!variable!output!#].
Sanity checks passed, ready to start: [#!variable!server!#].
@@ -917,6 +918,17 @@ If the targets are unique, did you copy the full database directory? A unique id
Verifying that the daemon: [#!variable!daemon!#] has started.
Waiting for the daemon: [#!variable!daemon!#] to start...
The daemon: [#!variable!daemon!#] was already running locally, no need to start.
+ Starting the daemon: [#!variable!daemon!#] on: [#!variable!host!#].
+ Verifying that the daemon: [#!variable!daemon!#] has started on: [#!variable!host!#].
+ Waiting for the daemon: [#!variable!daemon!#] to start on: [#!variable!host!#]...
+ The daemon: [#!variable!daemon!#] was already running on: [#!variable!host!#], no need to start.
+ There are no servers running on either node, stopping daemons.
+ There are no servers running on locally and the peer is not in the cluster, stopping daemons.
+ The daemon: [#!variable!daemon!#] is already stopped locally, nothing to do.
+ Stopping the daemon: [#!variable!daemon!#] locally.
+ The daemon: [#!variable!daemon!#] is already stopped on: [#!variable!host!#], nothing to do.
+ Stopping the daemon: [#!variable!daemon!#] on: [#!variable!host!#].
+ One or more servers are still running on the Anvil!, not stopping daemons.
The host name: [#!variable!target!#] does not resolve to an IP address.
diff --git a/tools/test.pl b/tools/test.pl
index 850c89b2..1aa140f9 100755
--- a/tools/test.pl
+++ b/tools/test.pl
@@ -25,261 +25,5 @@ print "Connecting to the database(s);\n";
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"});
-$anvil->data->{switches}{start} = "";
-$anvil->data->{switches}{stop} = "";
$anvil->Get->switches;
-my $peer = $anvil->Cluster->get_peers();
-my $i_am = $anvil->data->{sys}{anvil}{i_am};
-my $peer_is = $anvil->data->{sys}{anvil}{peer_is};
-my $my_name = $i_am ? $anvil->data->{sys}{anvil}{$i_am}{host_name} : "--";
-my $peer_name = $peer_is ? $anvil->data->{sys}{anvil}{$peer_is}{host_name} : "--";
-print "I am: .. [".$i_am."], my host name is: . [".$my_name."]\n";
-print "Peer is: [".$peer_is."], peer host name is: [".$peer_name."]\n";
-print "- Returned peer: [".$peer."]\n";
-
-if ($anvil->data->{switches}{start})
-{
- foreach my $daemon ("libvirtd.service", "drbd.service")
- {
- my $running_local = 0;
- my $running_peer = 0;
-
- my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- local_output => $local_output,
- local_return_code => $local_return_code,
- }});
- if ($local_return_code eq "3")
- {
- # Stopped, start it..
- print "Starting: [".$daemon."] locally\n";
- my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- return_code => $return_code,
- }});
-
- my $loops = 0;
- my $running = 0;
- until ($running)
- {
- my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- return_code => $return_code,
- }});
- if ($return_code eq "0")
- {
- $running = 1;
- print "Verified start of: [".$daemon."]\n";
- }
- else
- {
- $loops++;
- if ($loops > 3)
- {
- # Give up
- print "[ Error ] - Start of: [".$daemon."] appears to have failed!\n";
- die;
- }
- else
- {
- # Wait for a second.
- sleep 1;
- print "Waiting for: [".$daemon."] to start...\n";
- }
- }
- }
- }
- elsif ($local_return_code eq "0")
- {
- # Running, nothing to do.
- print "The daemon: [".$daemon."] is already running locally.\n";
- }
-
- my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- remote_output => $remote_output,
- remote_error => $remote_error,
- remote_return_code => $remote_return_code,
- }});
- if ($remote_return_code eq "3")
- {
- # Stopped, start it..
- print "Starting: [".$daemon."] on: [".$peer_name."]\n";
- my ($output, $error, $return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- error => $error,
- return_code => $return_code,
- }});
-
- my $loops = 0;
- my $running = 0;
- until ($running)
- {
- my ($output, $error, $return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- error => $error,
- return_code => $return_code,
- }});
- if ($return_code eq "0")
- {
- $running = 1;
- print "Verified start of: [".$daemon."] on: [".$peer_name."]\n";
- }
- else
- {
- $loops++;
- if ($loops > 3)
- {
- # Give up
- print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n";
- die;
- }
- else
- {
- # Wait for a second.
- sleep 1;
- print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
- }
- }
- }
- }
- elsif ($remote_return_code eq "0")
- {
- # Running, nothing to do.
- print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n";
- }
- }
-}
-elsif ($anvil->data->{switches}{stop})
-{
- my $stop = 0;
-
- # Check both nodes if a server is running on either node.
- my $local_vm_count = 0;
- my $remote_vm_count = 0;
-
- # Call virsh list --all
- my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- local_output => $local_output,
- local_return_code => $local_return_code,
- }});
- if (not $local_return_code)
- {
- # Parse output
- foreach my $line (split/\n/, $local_output)
- {
- $line = $anvil->Words->clean_spaces({ string => $line });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
-
- if ($line =~ /(\d+)\s+(.*?)\s+running/)
- {
- $local_vm_count++;
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }});
- }
- }
- }
-
- my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{virsh}." list --all",
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- remote_output => $remote_output,
- remote_error => $remote_error,
- remote_return_code => $remote_return_code,
- }});
- if (not $remote_return_code)
- {
- # Parse output
- foreach my $line (split/\n/, $remote_output)
- {
- $line = $anvil->Words->clean_spaces({ string => $line });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
-
- if ($line =~ /(\d+)\s+(.*?)\s+running/)
- {
- $remote_vm_count++;
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }});
- }
- }
- }
-
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- local_vm_count => $local_vm_count,
- remote_vm_count => $remote_vm_count,
- }});
- if ((not $local_vm_count) && (not $remote_vm_count))
- {
- print "No servers running on either node, stopping daemons.\n";
- foreach my $daemon ("libvirtd.service", "drbd.service")
- {
- my $running_local = 0;
- my $running_peer = 0;
-
- my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- local_output => $local_output,
- local_return_code => $local_return_code,
- }});
- if ($local_return_code eq "3")
- {
- # Already stopped.
- print "The daemon: [".$daemon."] is already stopped locally.\n";
- }
- elsif ($local_return_code eq "0")
- {
- # Running, stop it.
- print "Stopping: [".$daemon."] locally\n";
- my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- return_code => $return_code,
- }});
- }
-
- my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- remote_output => $remote_output,
- remote_error => $remote_error,
- remote_return_code => $remote_return_code,
- }});
- if ($remote_return_code eq "3")
- {
- # Already stopped.
- print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n";
- }
- elsif ($remote_return_code eq "0")
- {
- # Running, stop it.
- print "Stopping: [".$daemon."] on: [".$peer_name."]\n";
- my ($output, $error, $return_code) = $anvil->Remote->call({
- target => $peer_name,
- shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
- });
- $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
- output => $output,
- error => $error,
- return_code => $return_code,
- }});
- }
- }
- }
-}