* Created DRBD->manage_resource() that allows for up/down/primary/secondary'ing a resource on a local or remote system.

* Created Server->boot() that starts a server and verifies that it did actually start.
* Got ocf:anvil:server smarter about starting DRBD resources, properly handing resources where auto-promote isn't enabled. The 'start' process is now complete (baring bugs).

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 5 years ago
parent b1ddf945e2
commit d224be9344
  1. 131
      Anvil/Tools/DRBD.pm
  2. 115
      Anvil/Tools/Server.pm
  3. 126
      ocf/alteeve/server
  4. 5
      share/words.xml

@ -14,6 +14,7 @@ my $THIS_FILE = "DRBD.pm";
### Methods;
# get_devices
# get_status
# manage_resource
=pod
@ -174,6 +175,31 @@ sub get_devices
$anvil->data->{drbd}{config}{$host}{peer} = "";
$anvil->data->{drbd}{config}{$host}{nodes} = {};
foreach my $hash_ref (@{$dump_xml->{common}->[0]->{section}})
{
my $name = $hash_ref->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { name => $name }});
if ($name eq "options")
{
foreach my $option_ref (@{$hash_ref->{option}})
{
my $variable = $option_ref->{name};
my $value = $option_ref->{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
variable => $variable,
value => $variable,
}});
if ($variable eq "auto-promote")
{
$anvil->data->{drbd}{config}{$host}{'auto-promote'} = $value =~ /^y/i ? 1 : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"drbd::config::${host}::auto-promote" => $anvil->data->{drbd}{config}{$host}{'auto-promote'},
}});
}
}
}
}
foreach my $hash_ref (@{$dump_xml->{resource}})
{
my $this_resource = $hash_ref->{name};
@ -364,7 +390,7 @@ sub get_status
# Is this a local call or a remote call?
my $shell_call = $anvil->data->{path}{exe}{drbdsetup}." status --json";
my $output = "";
my $host = $anvil->_short_hostname({debug => $debug});
my $host = $anvil->_short_hostname();
if (($target) && ($target ne "local") && ($target ne $anvil->_hostname) && ($target ne $anvil->_short_hostname))
{
# Clear the hash where we'll store the data.
@ -565,6 +591,109 @@ sub get_status
return(0);
}
=head2 manage_resource
This takes a task, C<< up >>, C<< down >>, C<< primary >>, or C<< secondary >> and a resource name and acts on the request.
This returns the return code from the C<< drbdadm >> call. If C<< 255 >> is returned, then we did not get the actual return code from C<< drbdadm >>.
B<NOTE>: This just makes the call, it doesn't wait or watch for the action to actually finish.
Parameters;
=head3 password (optional)
This is the password to use when connecting to a remote machine. If not set, but C<< target >> is, an attempt to connect without a password will be made.
=head3 port (optional)
This is the TCP port to use when connecting to a remote machine. If not set, but C<< target >> is, C<< 22 >> will be used.
=head3 remote_user (optional, default 'root')
=head3 resource (required)
This is the name of the resource being acted upon.
=head3 task (required)
This is the action to take. Valid tasks are: C<< up >>, C<< down >>, C<< primary >>, and C<< secondary >>.
If C<< target >> is set, this will be the user we connect to the remote machine as.
=head3 target (optional)
This is the IP or host name of the machine to read the version of. If this is not set, the local system's version is checked.
=cut
sub manage_resource
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $port = defined $parameter->{port} ? $parameter->{port} : "";
my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root";
my $resource = defined $parameter->{resource} ? $parameter->{resource} : "";
my $task = defined $parameter->{task} ? $parameter->{task} : "";
my $target = defined $parameter->{target} ? $parameter->{target} : "local";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
password => $anvil->Log->secure ? $password : $anvil->Words->string({key => "log_0186"}),
port => $port,
remote_user => $remote_user,
resource => $resource,
task => $task,
target => $target,
}});
if (not $resource)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->manage_resource()", parameter => "resource" }});
return(1);
}
if (not $task)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->manage_resource()", parameter => "task" }});
return(1);
}
### TODO: Sanity check the resource name and task requested.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = "";
my $return_code = 255;
if (($target) && ($target ne "local") && ($target ne $anvil->_hostname) && ($target ne $anvil->_short_hostname))
{
# Remote call.
($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
target => $target,
port => $port,
password => $password,
remote_user => $remote_user,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
error => $error,
output => $output,
return_code => $return_code,
}});
}
else
{
# Local.
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
return_code => $return_code,
}});
}
return($return_code);
}
# =head3
#
# Private Functions;

@ -12,6 +12,8 @@ our $VERSION = "3.0.0";
my $THIS_FILE = "Server.pm";
### Methods;
# boot
# find
# get_status
=pod
@ -73,6 +75,100 @@ sub parent
# Public methods #
#############################################################################################################
=head2 boot
=cut
sub boot
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
my $password = defined $parameter->{password} ? $parameter->{password} : "";
my $port = defined $parameter->{port} ? $parameter->{port} : "";
my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root";
my $server = defined $parameter->{server} ? $parameter->{server} : "";
my $target = defined $parameter->{target} ? $parameter->{target} : "local";
my $success = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
password => $anvil->Log->secure ? $password : $anvil->Words->string({key => "log_0186"}),
port => $port,
remote_user => $remote_user,
server => $server,
target => $target,
}});
if (not $server)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->get_status()", parameter => "server" }});
return(1);
}
# Is this a local call or a remote call?
my $shell_call = $anvil->data->{path}{exe}{virsh}." create ".$anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml";
my $output = "";
my $return_code = "";
if (($target) && ($target ne "local") && ($target ne $anvil->_hostname) && ($target ne $anvil->_short_hostname))
{
# Remote call.
($output, my $error, $return_code) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
target => $target,
port => $port,
password => $password,
remote_user => $remote_user,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
}
else
{
# Local.
($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
return_code => $return_code,
}});
}
# Wait up to five seconds for the server to appear.
my $wait = 5;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'wait' => $wait }});
while($wait)
{
$anvil->Server->find({debug => $debug});
if ((exists $anvil->data->{server}{location}{$server}) && ($anvil->data->{server}{location}{$server}{host}))
{
# Success!
$wait = 0;
$success = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
'wait' => $wait,
success => $success,
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0421", variables => {
server => $server,
host => $anvil->data->{server}{location}{$server}{host},
}});
}
if ($wait)
{
$wait--;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { 'wait' => $wait }});
sleep 1;
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { success => $success }});
return($success);
}
=head2 find
This will look on the local or a remote machine for the list of servers that are running.
@ -165,12 +261,12 @@ sub find
if ($line =~ /^\d+ (.*) (.*?)$/)
{
my $server_name = $1;
$anvil->data->{server}{location}{$server_name}{status} = $2;
$anvil->data->{server}{location}{$server_name}{host} = $host;
my $server = $1;
$anvil->data->{server}{location}{$server}{status} = $2;
$anvil->data->{server}{location}{$server}{host} = $host;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"server::location::${server_name}::status" => $anvil->data->{server}{location}{$server_name}{status},
"server::location::${server_name}::host" => $anvil->data->{server}{location}{$server_name}{host},
"server::location::${server}::status" => $anvil->data->{server}{location}{$server}{status},
"server::location::${server}::host" => $anvil->data->{server}{location}{$server}{host},
}});
}
}
@ -231,6 +327,7 @@ sub get_status
if (not $server)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Server->get_status()", parameter => "server" }});
return(1);
}
if (exists $anvil->data->{server}{$server})
@ -765,14 +862,18 @@ sub _parse_definition
"server::${server}::${source}::device::${device}::target::${device_target}::driver::cache" => $anvil->data->{server}{$server}{$source}{device}{$device}{target}{$device_target}{driver}{cache},
}});
$anvil->data->{server}{$server}{device}{$device_path}{on_lv} = defined $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{on} ? $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{on} : "";
$anvil->data->{server}{$server}{device}{$device_path}{resource} = defined $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{resource} ? $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{resource} : "";
my $on_lv = defined $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{on} ? $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{on} : "";
my $resource = defined $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{resource} ? $anvil->data->{drbd}{config}{$host}{drbd_path}{$device_path}{resource} : "";
$anvil->data->{server}{$server}{device}{$device_path}{on_lv} = $on_lv;
$anvil->data->{server}{$server}{device}{$device_path}{resource} = $resource;
$anvil->data->{server}{$server}{device}{$device_path}{target} = $device_target;
$anvil->data->{server}{$server}{resource}{$resource} = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
host => $host,
"server::${server}::device::${device_path}::on_lv" => $anvil->data->{server}{$server}{device}{$device_path}{on_lv},
"server::${server}::device::${device_path}::resource" => $anvil->data->{server}{$server}{device}{$device_path}{resource},
"server::${server}::device::${device_path}::target" => $anvil->data->{server}{$server}{device}{$device_path}{target},
"server::${server}::resource::${resource}" => $anvil->data->{server}{$server}{resource}{$resource},
}});
# Keep a list of DRBD resources used by this server.

@ -317,19 +317,135 @@ sub start_server
# Is the server already running somewhere?
find_server($anvil);
#$anvil->Server->find({server => $server});
### TODO:
my $host = $anvil->_short_hostname;
print "I am: [".$anvil->data->{drbd}{config}{$host}{host}."]. my peer is: [".$anvil->data->{drbd}{config}{$host}{peer}."]\n";
# Start the resource
start_drbd_resource($anvil);
die;
# Still alive? Boot!
my ($success) = $anvil->Server->boot({debug => 2, server => $server});
if ($success)
{
# Success!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0309", variables => { server => $server }});
$anvil->nice_exit({exit_code => 0});
}
else
{
# WTF?
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0310", variables => {
server => $server,
'state' => defined $anvil->data->{server}{location}{$server}{host} ? $anvil->data->{server}{location}{$server}{host} : "#!string!unit_0003!#",
}});
$anvil->nice_exit({exit_code => 6});
}
# If we're still alive, then we didn't see the server in the list of running servers, which is really weird.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0311", variables => { server => $server }});
$anvil->nice_exit({exit_code => 1});
}
# This starts the drbd resource(s) for the requested server.
sub start_drbd_resource
{
my ($anvil) = @_;
my $server = $anvil->data->{environment}{OCF_RESKEY_name};
my $host = $anvil->_short_hostname;
my $peer = $anvil->data->{drbd}{config}{$host}{peer};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
server => $server,
host => $host,
peer => $peer,
}});
# Start DRBD locally.
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{resource}})
{
my $peer_ip = $anvil->data->{drbd}{config}{$host}{resource}{$resource}{connection}{$peer}{ip_address};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0419", variables => {
server => $server,
peer => $peer,
peer_ip => $peer_ip,
resource => $resource,
}});
# Bring the local resource up
$anvil->DRBD->manage_resource({
resource => $resource,
task => "up",
});
# Bring the peer's resource up.
$anvil->DRBD->manage_resource({
resource => $resource,
task => "up",
target => $peer_ip,
});
# Now wait for it to be connected or UpToDate...
my $waiting = 1;
while($waiting)
{
$anvil->DRBD->get_status({debug => 3});
my $connection_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer}{'connection-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
connection_state => $connection_state,
}});
my $all_ready = 1;
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}})
{
my $disk_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{devices}{volume}{$volume}{'disk-state'};
my $replication_state = $anvil->data->{drbd}{status}{$host}{resource}{$resource}{connection}{$peer}{volume}{$volume}{'replication-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
disk_state => $disk_state,
replication_state => $replication_state,
}});
# Is the peer isn't connected (directly or by being in Sync), or this volume
# isn't UpToDate, we need to keep waiting.
if ((lc($disk_state) ne "uptodate") && ($replication_state !~ /^Sync/i) && (lc($connection_state) ne "connected"))
{
$all_ready = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_ready => $all_ready }});
if ($all_ready)
{
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
if ($waiting)
{
sleep 1;
}
}
}
# If auto-promote isn't set, promote the resource.
if (not $anvil->data->{drbd}{config}{$host}{'auto-promote'})
{
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$server}{resource}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0420", variables => {
server => $server,
resource => $resource,
}});
# Make the local resource primary.
$anvil->DRBD->manage_resource({
resource => $resource,
task => "primary",
});
}
}
return(0);
}
# This uses the DRBD information to find other peers and see if the server is running on them.
sub find_server
{

@ -736,6 +736,9 @@ Failed to promote the DRBD resource: [#!variable!resource!#] primary. Expected a
<key name="log_0416">The server: [#!variable!server!#] is already running. Exiting successfully.</key>
<key name="log_0417">The server: [#!variable!server!#] is already running on: [#!variable!host!#]. This appears to be a DR host, which is outside pacemaker. Exiting with OCF_ERR_CONFIGURED (6) to prevent pacemaker from trying to start the server on the other node.</key>
<key name="log_0418">The server: [#!variable!server!#] is already running on: [#!variable!host!#]. This appears to be our peer. Exiting with OCF_ERR_INSTALLED (5) to tell pacemaker to try to start it on the other node.</key>
<key name="log_0419">The server: [#!variable!server!#] needs the DRBD resource: [#!variable!resource!#]. Bringing it up locally and on the peer: [#!variable!peer!#] (via IP: #!variable!peer_ip!#).</key>
<key name="log_0420">DRBD's 'auto-promote' is disabled. Promoting the resource: [#!variable!resource!#].</key>
<key name="log_0421">The server: [#!variable!server!#] is now running on the host: [#!variable!host!#].</key>
<!-- Test words. Do NOT change unless you update 't/Words.t' or tests will needlessly fail. -->
<key name="t_0000">Test</key>
@ -1025,6 +1028,8 @@ Failed to generate an RSA public key for the user: [#!variable!user!#]. The outp
<!-- These are units, words and so on used when displaying information. -->
<key name="unit_0001">Yes</key>
<key name="unit_0002">No</key>
<key name="unit_0003">None</key>
<key name="unit_0004">Unknown</key>
<!-- TODO: Merge these into 'unit' -->
<!-- These are works and strings used by javascript/jqery -->

Loading…
Cancel
Save