* Finished (though testing is needed) the updated ocf:alteeve:server resource agent. It now handles starting and stopping libvirtd and drbd daemons on-demand.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 5 years ago
parent dcd1fd1492
commit 01974d7efe
  1. 73
      ocf/alteeve/server
  2. 14
      share/words.xml
  3. 256
      tools/test.pl

@ -271,7 +271,7 @@ $anvil->nice_exit({exit_code => 255});
# This will either verify that 'libvirtd' and 'drbd' are running (and start them if not) is called with # This will either verify that 'libvirtd' and 'drbd' are running (and start them if not) is called with
# "start". If called with "stop", a check is made on both nodes. If all VMs are gone, "libvirtd" and "drbd" # "start". If called with "stop", a check is made on both nodes. If all VMs are gone, "libvirtd" and "drbd"
# are stopped. # are stopped.
sub check_services sub check_daemons
{ {
my ($anvil, $task) = @_; my ($anvil, $task) = @_;
@ -368,7 +368,10 @@ sub check_services
if ($return_code eq "3") if ($return_code eq "3")
{ {
# Stopped, start it.. # Stopped, start it..
print "Starting: [".$daemon."] on: [".$peer_name."]\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0486", variables => {
daemon => $daemon,
host => $peer_name,
}});
my ($output, $error, $return_code) = $anvil->Remote->call({ my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name, target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon, shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
@ -395,22 +398,34 @@ sub check_services
if ($return_code eq "0") if ($return_code eq "0")
{ {
$running = 1; $running = 1;
print "Verified start of: [".$daemon."] on: [".$peer_name."]\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0487", variables => {
daemon => $daemon,
host => $peer_name,
}});
} }
else else
{ {
$loops++; $loops++;
if ($loops > 3) if ($loops > 5)
{ {
### TODO: We may want to NOT die here, if
### we're booting a server (though we
### will if we're migrating).
# Give up # Give up
print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0135", variables => {
die; daemon => $daemon,
host => $peer_name,
}});
$anvil->nice_exit({exit_code => 1});
} }
else else
{ {
# Wait for a second. # Wait for a second.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0488", variables => {
daemon => $daemon,
host => $peer_name,
}});
sleep 1; sleep 1;
print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
} }
} }
} }
@ -418,7 +433,10 @@ sub check_services
elsif ($return_code eq "0") elsif ($return_code eq "0")
{ {
# Running, nothing to do. # Running, nothing to do.
print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0485", variables => {
daemon => $daemon,
host => $peer_name,
}});
} }
} }
} }
@ -484,7 +502,16 @@ sub check_services
}}); }});
if ((not $local_vm_count) && (not $remote_vm_count)) if ((not $local_vm_count) && (not $remote_vm_count))
{ {
print "No servers running on either node, stopping daemons.\n"; if ($peer_ready)
{
# No servers running on either node.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0490"});
}
else
{
# No servers running here and the peer is not in the cluster.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0491"});
}
foreach my $daemon ("libvirtd.service", "drbd.service") foreach my $daemon ("libvirtd.service", "drbd.service")
{ {
my $running_local = 0; my $running_local = 0;
@ -498,12 +525,12 @@ sub check_services
if ($local_return_code eq "3") if ($local_return_code eq "3")
{ {
# Already stopped. # Already stopped.
print "The daemon: [".$daemon."] is already stopped locally.\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0492", variables => { daemon => $daemon }});
} }
elsif ($local_return_code eq "0") elsif ($local_return_code eq "0")
{ {
# Running, stop it. # Running, stop it.
print "Stopping: [".$daemon."] locally\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0493", variables => { daemon => $daemon }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon}); my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, output => $output,
@ -523,12 +550,18 @@ sub check_services
if ($remote_return_code eq "3") if ($remote_return_code eq "3")
{ {
# Already stopped. # Already stopped.
print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0494", variables => {
daemon => $daemon,
host => $peer_name,
}});
} }
elsif ($remote_return_code eq "0") elsif ($remote_return_code eq "0")
{ {
# Running, stop it. # Running, stop it.
print "Stopping: [".$daemon."] on: [".$peer_name."]\n"; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0495", variables => {
daemon => $daemon,
host => $peer_name,
}});
my ($output, $error, $return_code) = $anvil->Remote->call({ my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name, target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon, shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
@ -541,9 +574,13 @@ sub check_services
} }
} }
} }
else
{
# Servers are still running, don't stop the daemons.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0496"});
}
} }
return(0); return(0);
} }
@ -576,7 +613,7 @@ sub start_server
my ($anvil) = @_; my ($anvil) = @_;
# Before we do anything, make sure that 'libvirtd' and 'drbd' services are running. # Before we do anything, make sure that 'libvirtd' and 'drbd' services are running.
check_services($anvil, "start"); check_daemons($anvil, "start");
# Start procedure; # Start procedure;
# 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails. # 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails.
@ -903,6 +940,9 @@ sub stop_server
stop_drbd_resource($anvil); stop_drbd_resource($anvil);
} }
# If this was the last running server, stop the daemons.
check_daemons($anvil, "stop");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0324", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});
} }
@ -934,6 +974,9 @@ sub migrate_server
{ {
my ($anvil) = @_; my ($anvil) = @_;
# Before migrating, make sure the daemons are running on the peer.
check_daemons($anvil, "start");
### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a ### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a
### user might want to do this (ie: sync will be done soon and the need to evacuate the node ### user might want to do this (ie: sync will be done soon and the need to evacuate the node
### ASAP is high). Maybe we'll enforce this and require a '--force' switch later? ### ASAP is high). Maybe we'll enforce this and require a '--force' switch later?

@ -194,6 +194,7 @@ The error was:
<key name="error_0132">Failed to remove the symlink: [#!variable!symlink!#]!</key> <key name="error_0132">Failed to remove the symlink: [#!variable!symlink!#]!</key>
<key name="error_0133">Failed to read or parse the CIB! Is pacemaker running?</key> <key name="error_0133">Failed to read or parse the CIB! Is pacemaker running?</key>
<key name="error_0134">Failed to start the daemon: [#!variable!daemon!#] on the local system, unable to boot the server.</key> <key name="error_0134">Failed to start the daemon: [#!variable!daemon!#] on the local system, unable to boot the server.</key>
<key name="error_0135">Failed to start the daemon: [#!variable!daemon!#] on [#!variable!host!#], unable to boot the server.</key>
<!-- Table headers --> <!-- Table headers -->
<key name="header_0001">Current Network Interfaces and States</key> <key name="header_0001">Current Network Interfaces and States</key>
@ -712,7 +713,7 @@ We will keep looking.</key>
<key name="log_0299">We were asked to promote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.</key> <key name="log_0299">We were asked to promote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.</key>
<key name="log_0300">We were asked to demote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.</key> <key name="log_0300">We were asked to demote: [#!variable!server!#], which makes no sense and is not supported. Ignoreing.</key>
<key name="log_0301">We were asked to notify, but this is not a promotable (we're stateless) agent. Ignoring.</key> <key name="log_0301">We were asked to notify, but this is not a promotable (we're stateless) agent. Ignoring.</key>
<key name="log_0302">We were invoked with an unexpected (or no) command. Environment variables and arguments below.</key> <key name="log_0302">We were invoked with an unexpected (or no) command. Environment variables and arguments have been logged.</key>
<key name="log_0303">We've been asked to start the server: [#!variable!server!#].</key> <key name="log_0303">We've been asked to start the server: [#!variable!server!#].</key>
<key name="log_0304">It appears that the list the currently running servers returned a non-zero return code: [#!variable!return_code!#]. We will proceed as we may be able to fix this. The output, if any, was: [#!variable!output!#].</key> <key name="log_0304">It appears that the list the currently running servers returned a non-zero return code: [#!variable!return_code!#]. We will proceed as we may be able to fix this. The output, if any, was: [#!variable!output!#].</key>
<key name="log_0305">Sanity checks passed, ready to start: [#!variable!server!#].</key> <key name="log_0305">Sanity checks passed, ready to start: [#!variable!server!#].</key>
@ -917,6 +918,17 @@ If the targets are unique, did you copy the full database directory? A unique id
<key name="log_0483">Verifying that the daemon: [#!variable!daemon!#] has started.</key> <key name="log_0483">Verifying that the daemon: [#!variable!daemon!#] has started.</key>
<key name="log_0484">Waiting for the daemon: [#!variable!daemon!#] to start...</key> <key name="log_0484">Waiting for the daemon: [#!variable!daemon!#] to start...</key>
<key name="log_0485">The daemon: [#!variable!daemon!#] was already running locally, no need to start.</key> <key name="log_0485">The daemon: [#!variable!daemon!#] was already running locally, no need to start.</key>
<key name="log_0486">Starting the daemon: [#!variable!daemon!#] on: [#!variable!host!#].</key>
<key name="log_0487">Verifying that the daemon: [#!variable!daemon!#] has started on: [#!variable!host!#].</key>
<key name="log_0488">Waiting for the daemon: [#!variable!daemon!#] to start on: [#!variable!host!#]...</key>
<key name="log_0489">The daemon: [#!variable!daemon!#] was already running on: [#!variable!host!#], no need to start.</key>
<key name="log_0490">There are no servers running on either node, stopping daemons.</key>
<key name="log_0491">There are no servers running on locally and the peer is not in the cluster, stopping daemons.</key>
<key name="log_0492">The daemon: [#!variable!daemon!#] is already stopped locally, nothing to do.</key>
<key name="log_0493">Stopping the daemon: [#!variable!daemon!#] locally.</key>
<key name="log_0494">The daemon: [#!variable!daemon!#] is already stopped on: [#!variable!host!#], nothing to do.</key>
<key name="log_0495">Stopping the daemon: [#!variable!daemon!#] on: [#!variable!host!#].</key>
<key name="log_0496">One or more servers are still running on the Anvil!, not stopping daemons.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -25,261 +25,5 @@ print "Connecting to the database(s);\n";
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"});
$anvil->data->{switches}{start} = "";
$anvil->data->{switches}{stop} = "";
$anvil->Get->switches; $anvil->Get->switches;
my $peer = $anvil->Cluster->get_peers();
my $i_am = $anvil->data->{sys}{anvil}{i_am};
my $peer_is = $anvil->data->{sys}{anvil}{peer_is};
my $my_name = $i_am ? $anvil->data->{sys}{anvil}{$i_am}{host_name} : "--";
my $peer_name = $peer_is ? $anvil->data->{sys}{anvil}{$peer_is}{host_name} : "--";
print "I am: .. [".$i_am."], my host name is: . [".$my_name."]\n";
print "Peer is: [".$peer_is."], peer host name is: [".$peer_name."]\n";
print "- Returned peer: [".$peer."]\n";
if ($anvil->data->{switches}{start})
{
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if ($local_return_code eq "3")
{
# Stopped, start it..
print "Starting: [".$daemon."] locally\n";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code eq "0")
{
$running = 1;
print "Verified start of: [".$daemon."]\n";
}
else
{
$loops++;
if ($loops > 3)
{
# Give up
print "[ Error ] - Start of: [".$daemon."] appears to have failed!\n";
die;
}
else
{
# Wait for a second.
sleep 1;
print "Waiting for: [".$daemon."] to start...\n";
}
}
}
}
elsif ($local_return_code eq "0")
{
# Running, nothing to do.
print "The daemon: [".$daemon."] is already running locally.\n";
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if ($remote_return_code eq "3")
{
# Stopped, start it..
print "Starting: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
if ($return_code eq "0")
{
$running = 1;
print "Verified start of: [".$daemon."] on: [".$peer_name."]\n";
}
else
{
$loops++;
if ($loops > 3)
{
# Give up
print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n";
die;
}
else
{
# Wait for a second.
sleep 1;
print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
}
}
}
}
elsif ($remote_return_code eq "0")
{
# Running, nothing to do.
print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n";
}
}
}
elsif ($anvil->data->{switches}{stop})
{
my $stop = 0;
# Check both nodes if a server is running on either node.
my $local_vm_count = 0;
my $remote_vm_count = 0;
# Call virsh list --all
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if (not $local_return_code)
{
# Parse output
foreach my $line (split/\n/, $local_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$local_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }});
}
}
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{virsh}." list --all",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if (not $remote_return_code)
{
# Parse output
foreach my $line (split/\n/, $remote_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$remote_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }});
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_vm_count => $local_vm_count,
remote_vm_count => $remote_vm_count,
}});
if ((not $local_vm_count) && (not $remote_vm_count))
{
print "No servers running on either node, stopping daemons.\n";
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if ($local_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped locally.\n";
}
elsif ($local_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] locally\n";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if ($remote_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n";
}
elsif ($remote_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
}
}
}
}

Loading…
Cancel
Save