* Created Cluster->check_node_status() that checks the status of a node (in pacemaker).

* Created Cluster->get_peers() that figures out who the peer node (and DR host, if applicable) are.
* Updated Cluster->parse_cib() to dig out more information.
* Created Cluster->start_cluster() to start pacemaker (via pcsd) locally or on all (both) nodes.
* Started working on ocf:alteeve:server to start/stop the libvirtd/drbd daemons as needed, instead of having pacemaker do it.
* Got more work done on anvil-join-anvil. Node 2 now waits for the cluster to start, and node 1 will do setup as needed, then wait for the cluster to start as well.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 5 years ago
parent 2692a4219e
commit dcd1fd1492
  1. 372
      Anvil/Tools/Cluster.pm
  2. 4
      Anvil/Tools/Remote.pm
  3. 285
      ocf/alteeve/server
  4. 15
      share/words.xml
  5. 187
      tools/anvil-join-anvil
  6. 265
      tools/test.pl

@ -14,7 +14,10 @@ our $VERSION = "3.0.0";
my $THIS_FILE = "Cluster.pm"; my $THIS_FILE = "Cluster.pm";
### Methods; ### Methods;
# check_node_status
# get_peers
# parse_cib # parse_cib
# start_cluster
=pod =pod
@ -74,6 +77,176 @@ sub parent
# Public methods # # Public methods #
############################################################################################################# #############################################################################################################
=head2 check_node_status
This takes a node name (generally the short host name) and, using a C<< parse_cib >> call data (made before calling this method), the node's ready state will be checked. If the node is ready, C<< 1 >> is returned. If not, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned.
Parameters;
=head3 node_name (required)
This is the node name as used when configured in the cluster. In most cases, this is the short host name.
=cut
sub check_node_status
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->check_node_status()" }});
my $node_name = defined $parameter->{node_name} ? $parameter->{node_name} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
node_name => $node_name,
}});
if (not $node_name)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->get_host_from_uuid()", parameter => "host_uuid" }});
return("!!error!!");
}
if (not exists $anvil->data->{cib}{parsed}{data}{node}{$node_name})
{
$anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm} = 0;
$anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd} = 0;
$anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'} = 0;
$anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::data::node::${node_name}::node_state::in_ccm" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm},
"cib::parsed::data::node::${node_name}::node_state::crmd" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd},
"cib::parsed::data::node::${node_name}::node_state::join" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'},
"cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready},
}});
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready},
}});
return($anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready});
}
=head2 get_peers
This method uses the local machine's host UUID and finds the host names of the cluster memebers. If this host is in a cluster and it is a node, the peer's short host name is returned. Otherwise, an empty string is returned.
The data is stored as;
sys::anvil::node1::host_uuid
sys::anvil::node1::host_name
sys::anvil::node2::host_uuid
sys::anvil::node2::host_name
sys::anvil::dr1::host_uuid
sys::anvil::dr1::host_name
To assist with lookup, the following are also set;
sys::anvil::i_am = {node1,node2,dr1}
sys::anvil::peer_is = {node1,node2} # Not set if this host is 'dr1'
This method takes no parameters.
=cut
sub get_peers
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->get_peers()" }});
$anvil->data->{sys}{anvil}{node1}{host_uuid} = "";
$anvil->data->{sys}{anvil}{node1}{host_name} = "";
$anvil->data->{sys}{anvil}{node2}{host_uuid} = "";
$anvil->data->{sys}{anvil}{node2}{host_name} = "";
$anvil->data->{sys}{anvil}{dr1}{host_uuid} = "";
$anvil->data->{sys}{anvil}{dr1}{host_name} = "";
$anvil->data->{sys}{anvil}{i_am} = "";
$anvil->data->{sys}{anvil}{peer_is} = "";
# Load hosts and anvils
$anvil->Database->get_hosts({debug => $debug});
$anvil->Database->get_anvils({debug => $debug});
# Is ths host in an anvil?
my $host_uuid = $anvil->Get->host_uuid({debug => $debug});
my $in_anvil = "";
my $found = 0;
my $peer = "";
foreach my $anvil_uuid (keys %{$anvil->data->{anvils}{anvil_uuid}})
{
my $anvil_node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $anvil_node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $anvil_dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
anvil_node1_host_uuid => $anvil_node1_host_uuid,
anvil_node2_host_uuid => $anvil_node2_host_uuid,
anvil_dr1_host_uuid => $anvil_dr1_host_uuid,
}});
if ($host_uuid eq $anvil_node1_host_uuid)
{
# Found our Anvil!, and we're node 1.
$found = 1;
$anvil->data->{sys}{anvil}{i_am} = "node1";
$anvil->data->{sys}{anvil}{peer_is} = "node2";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
found => $found,
"sys::anvil::i_am" => $anvil->data->{sys}{anvil}{i_am},
"sys::anvil::peer_is" => $anvil->data->{sys}{anvil}{peer_is},
}});
}
elsif ($host_uuid eq $anvil_node2_host_uuid)
{
# Found our Anvil!, and we're node 1.
$found = 1;
$anvil->data->{sys}{anvil}{i_am} = "node2";
$anvil->data->{sys}{anvil}{peer_is} = "node1";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
found => $found,
"sys::anvil::i_am" => $anvil->data->{sys}{anvil}{i_am},
"sys::anvil::peer_is" => $anvil->data->{sys}{anvil}{peer_is},
}});
}
elsif ($host_uuid eq $anvil_dr1_host_uuid)
{
# Found our Anvil!, and we're node 1.
$found = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { found => $found }});
}
if ($found)
{
$anvil->data->{sys}{anvil}{node1}{host_uuid} = $anvil_node1_host_uuid;
$anvil->data->{sys}{anvil}{node1}{host_name} = $anvil->data->{hosts}{host_uuid}{$anvil_node1_host_uuid}{host_name};
$anvil->data->{sys}{anvil}{node2}{host_uuid} = $anvil_node2_host_uuid;
$anvil->data->{sys}{anvil}{node2}{host_name} = $anvil->data->{hosts}{host_uuid}{$anvil_node2_host_uuid}{host_name};
$anvil->data->{sys}{anvil}{dr1}{host_uuid} = $anvil_dr1_host_uuid ? $anvil_dr1_host_uuid : "";
$anvil->data->{sys}{anvil}{dr1}{host_name} = $anvil_dr1_host_uuid ? $anvil->data->{hosts}{host_uuid}{$anvil_dr1_host_uuid}{host_name} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"sys::anvil::node1::host_uuid" => $anvil->data->{sys}{anvil}{node1}{host_uuid},
"sys::anvil::node1::host_name" => $anvil->data->{sys}{anvil}{node1}{host_name},
"sys::anvil::node2::host_uuid" => $anvil->data->{sys}{anvil}{node2}{host_uuid},
"sys::anvil::node2::host_name" => $anvil->data->{sys}{anvil}{node2}{host_name},
"sys::anvil::dr1::host_uuid" => $anvil->data->{sys}{anvil}{dr1}{host_uuid},
"sys::anvil::dr1::host_name" => $anvil->data->{sys}{anvil}{dr1}{host_name},
}});
# If this is a node, return the peer's short host name.
if ($anvil->data->{sys}{anvil}{i_am})
{
$peer = $anvil->data->{sys}{anvil}{i_am} eq "node1" ? $anvil->data->{sys}{anvil}{node1}{host_name} : $anvil->data->{sys}{anvil}{node2}{host_name};
$peer =~ s/\..*//;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { peer => $peer }});
}
last;
}
}
return($peer);
}
=head2 parse_cib =head2 parse_cib
This reads in the CIB XML and parses it. On success, it returns C<< 0 >>. On failure (ie: pcsd isn't running), returns C<< 1 >>. This reads in the CIB XML and parses it. On success, it returns C<< 0 >>. On failure (ie: pcsd isn't running), returns C<< 1 >>.
@ -92,6 +265,11 @@ sub parse_cib
{ {
delete $anvil->data->{cib}{parsed}; delete $anvil->data->{cib}{parsed};
} }
# This stores select data we've pulled out that's meant to be easier to find.
if (exists $anvil->data->{cib}{data})
{
delete $anvil->data->{cib}{data};
}
my $problem = 1; my $problem = 1;
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib"; my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib";
@ -119,9 +297,44 @@ sub parse_cib
} }
else else
{ {
### NOTE: Full CIB details;
### - https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html-single/Pacemaker_Explained/index.html
# Successful parse! # Successful parse!
$problem = 0; $problem = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
foreach my $nvpair ($dom->findnodes('/cib/configuration/crm_config/cluster_property_set/nvpair'))
{
my $nvpair_id = $nvpair->{id};
foreach my $variable (sort {$a cmp $b} keys %{$nvpair})
{
next if $variable eq "id";
$anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable} = $nvpair->{$variable};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::crm_config::cluster_property_set::nvpair::${nvpair_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable},
}});
}
}
foreach my $node ($dom->findnodes('/cib/configuration/nodes/node'))
{
my $node_id = $node->{id};
foreach my $variable (sort {$a cmp $b} keys %{$node})
{
next if $variable eq "id";
$anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable} = $node->{$variable};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::nodes::${node_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable},
}});
if ($variable eq "uname")
{
my $node = $node->{$variable};
$anvil->data->{cib}{parsed}{data}{node}{$node}{id} = $node_id;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::data::node::${node}::id" => $anvil->data->{cib}{parsed}{data}{node}{$node}{id},
}});
}
}
}
foreach my $clone ($dom->findnodes('/cib/configuration/resources/clone')) foreach my $clone ($dom->findnodes('/cib/configuration/resources/clone'))
{ {
my $clone_id = $clone->{id}; my $clone_id = $clone->{id};
@ -164,6 +377,57 @@ sub parse_cib
} }
} }
} }
### TODO: /cib/configuration/constraints
foreach my $node_state ($dom->findnodes('/cib/status/node_state'))
{
my $id = $node_state->{id};
foreach my $variable (sort {$a cmp $b} keys %{$node_state})
{
next if $variable eq "id";
$anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable} = $node_state->{$variable};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::cib::node_state::${id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable},
}});
}
foreach my $lrm ($node_state->findnodes('./lrm'))
{
my $lrm_id = $lrm->{id};
foreach my $lrm_resource ($lrm->findnodes('./lrm_resources/lrm_resource'))
{
my $lrm_resource_id = $lrm_resource->{id};
$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{type} = $lrm_resource->{type};
$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{class} = $lrm_resource->{class};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::type" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{type},
"cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::class" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{class},
}});
foreach my $lrm_rsc_op ($lrm_resource->findnodes('./lrm_rsc_op'))
{
my $lrm_rsc_op_id = $lrm_rsc_op->{id};
foreach my $variable (sort {$a cmp $b} keys %{$lrm_rsc_op})
{
next if $variable eq "id";
$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}{$lrm_rsc_op_id}{$variable} = $lrm_rsc_op->{$variable};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::lrm_rsc_op_id::${lrm_rsc_op_id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}{$lrm_rsc_op_id}{$variable},
}});
}
}
}
}
foreach my $transient_attributes ($node_state->findnodes('./transient_attributes'))
{
# Currently, there seems to be no other data stored here.
my $transient_attributes_id = $transient_attributes->{id};
foreach my $instance_attributes ($transient_attributes->findnodes('./instance_attributes'))
{
$anvil->data->{cib}{parsed}{cib}{node_state}{$id}{transient_attributes_id}{$transient_attributes_id}{instance_attributes_id} = $instance_attributes->{id};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::cib::status::node_state::${id}::transient_attributes_id::${transient_attributes_id}::instance_attributes_id" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{transient_attributes_id}{$transient_attributes_id}{instance_attributes_id},
}});
}
}
}
foreach my $primitive ($dom->findnodes('/cib/configuration/resources/primitive')) foreach my $primitive ($dom->findnodes('/cib/configuration/resources/primitive'))
{ {
my $id = $primitive->{id}; my $id = $primitive->{id};
@ -208,49 +472,97 @@ sub parse_cib
}}); }});
} }
} }
foreach my $nvpair ($dom->findnodes('/cib/configuration/crm_config/cluster_property_set/nvpair'))
{
my $nvpair_id = $nvpair->{id};
foreach my $variable (sort {$a cmp $b} keys %{$nvpair})
{
next if $variable eq "id";
$anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable} = $nvpair->{$variable};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::crm_config::cluster_property_set::nvpair::${nvpair_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable},
}});
} }
} }
foreach my $node ($dom->findnodes('/cib/configuration/nodes/node'))
{ # Pull some data out for easier access.
my $node_id = $node->{id}; $anvil->data->{cib}{parsed}{peer}{ready} = "";
foreach my $variable (sort {$a cmp $b} keys %{$node}) $anvil->data->{cib}{parsed}{peer}{name} = "";
foreach my $node_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}})
{ {
next if $variable eq "id"; # The "coming up" order is 'in_ccm' then 'crmd' then 'join'.
$anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable} = $node->{$variable}; my $node_id = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{id};
my $in_ccm = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{in_ccm} eq "true" ? 1 : 0; # 'true' or 'false' - Corosync member
my $crmd = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{crmd} eq "online" ? 1 : 0; # 'online' or 'offline' - In corosync process group
my $join = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{'join'} eq "member" ? 1 : 0; # 'member' or 'down' - Completed controller join process
my $ready = (($in_ccm) && ($crmd) && ($join)) ? 1 : 0; # Our summary of if the node is "up"
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::nodes::${node_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable}, 's1:node_name' => $node_name,
's2:node_id' => $node_id,
's3:in_ccm' => $in_ccm,
's4:crmd' => $crmd,
's5:join' => $join,
's6:ready' => $ready,
}}); }});
}
} $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm} = $in_ccm;
foreach my $node_state ($dom->findnodes('/cib/status/node_state')) $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd} = $crmd;
{ $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'} = $join;
my $id = $node_state->{id}; $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready} = $ready;
foreach my $variable (sort {$a cmp $b} keys %{$node_state}) $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::data::node::${node_name}::node_state::in_ccm" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm},
"cib::parsed::data::node::${node_name}::node_state::crmd" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd},
"cib::parsed::data::node::${node_name}::node_state::join" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'},
"cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready},
}});
# Is this me or the peer?
if (($node_name ne $anvil->_host_name) && ($node_name ne $anvil->_short_host_name))
{ {
next if $variable eq "id"; # It's our peer.
$anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable} = $node_state->{$variable}; $anvil->data->{cib}{parsed}{peer}{ready} = $ready;
$anvil->data->{cib}{parsed}{peer}{name} = $node_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::cib::node_state::${id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable}, "cib::parsed::peer::ready" => $anvil->data->{cib}{parsed}{peer}{ready},
"cib::parsed::peer::name" => $anvil->data->{cib}{parsed}{peer}{name},
}}); }});
} }
} }
die;
} return($problem);
}
=head2 start_cluster
This will join the local node to the pacemaker cluster. Optionally, it can try to start the cluster on both nodes if C<< all >> is set.
Parameters;
=head3 all (optional, default '0')
If set, the cluster will be started on both (all) nodes.
=cut
sub start_cluster
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->parse_cib()" }});
my $all = defined $parameter->{all} ? $parameter->{all} : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
all => $all,
}});
my $success = 1;
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
if ($all)
{
$shell_call .= " --all";
} }
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => {
shell_call => $shell_call,
}});
#print Dumper $anvil->data->{cib}{parsed}; my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => {
output => $output,
return_code => $return_code,
}});
return($problem); return($success);
} }
# =head3 # =head3

@ -664,9 +664,11 @@ sub call
my $clean_output = ""; my $clean_output = "";
foreach my $line (split/\n/, $output) foreach my $line (split/\n/, $output)
{ {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { line => $line }});
if ($line =~ /^return_code:(\d+)$/) if ($line =~ /^return_code:(\d+)$/)
{ {
$return_code = $1; $return_code = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { return_code => $return_code }});
} }
elsif ($line =~ /return_code:(\d+)$/) elsif ($line =~ /return_code:(\d+)$/)
{ {
@ -677,7 +679,7 @@ sub call
$return_code = $1; $return_code = $1;
$line =~ s/return_code:\d+$//; $line =~ s/return_code:\d+$//;
$output .= $line."\n"; $output .= $line."\n";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
line => $line, line => $line,
output => $output, output => $output,
return_code => $return_code, return_code => $return_code,

@ -35,7 +35,7 @@
# - Pacemaker interprets this exit code as a soft error. # - Pacemaker interprets this exit code as a soft error.
# #
# 2 - OCF_ERR_ARGS # 2 - OCF_ERR_ARGS
# - The resource’s configuration is not valid on this machine. This can happen if the serve fails to boot # - The resource’s configuration is not valid on this machine. This can happen if the server fails to boot
# because of a missing bridge, for example. # because of a missing bridge, for example.
# #
# 3 - OCF_ERR_UNIMPLEMENTED # 3 - OCF_ERR_UNIMPLEMENTED
@ -268,6 +268,285 @@ $anvil->nice_exit({exit_code => 255});
# Functions # # Functions #
############################################################################################################# #############################################################################################################
# This will either verify that 'libvirtd' and 'drbd' are running (and start them if not) is called with
# "start". If called with "stop", a check is made on both nodes. If all VMs are gone, "libvirtd" and "drbd"
# are stopped.
sub check_services
{
my ($anvil, $task) = @_;
my $problem = $anvil->Cluster->parse_cib();
if ($problem)
{
# Pacemaker isn't running, or some other problem. Someone must have called this script
# directly or something.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0133"});
$anvil->nice_exit({exit_code => 1});
}
# Is the peer running? We'll use this to know whether to try and start daemons on the peer.
my $peer_name = $anvil->Cluster->get_peers();
my $peer_ready = $anvil->data->{cib}{parsed}{peer}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peer_name => $peer_name,
peer_ready => $peer_ready,
}});
if ($task eq "start")
{
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code eq "3")
{
# It is stopped, start it..
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0482", variables => { daemon => $daemon }});
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code eq "0")
{
# It's running
$running = 1;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0483", variables => { daemon => $daemon }});
}
else
{
$loops++;
if ($loops > 5)
{
# Give up
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0134", variables => { daemon => $daemon }});
$anvil->nice_exit({exit_code => 1});
}
else
{
# Wait for a second.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0484", variables => { daemon => $daemon }});
sleep 1;
}
}
}
}
elsif ($return_code eq "0")
{
# Running, nothing to do.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0485", variables => { daemon => $daemon }});
}
### TODO: Left off here.
if ($peer_ready)
{
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
if ($return_code eq "3")
{
# Stopped, start it..
print "Starting: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
if ($return_code eq "0")
{
$running = 1;
print "Verified start of: [".$daemon."] on: [".$peer_name."]\n";
}
else
{
$loops++;
if ($loops > 3)
{
# Give up
print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n";
die;
}
else
{
# Wait for a second.
sleep 1;
print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
}
}
}
}
elsif ($return_code eq "0")
{
# Running, nothing to do.
print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n";
}
}
}
}
if ($task eq "stop")
{
my $stop = 0;
# Check both nodes if a server is running on either node.
my $local_vm_count = 0;
my $remote_vm_count = 0;
# Call virsh list --all
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if (not $local_return_code)
{
# Parse output
foreach my $line (split/\n/, $local_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$local_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }});
}
}
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{virsh}." list --all",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if (not $remote_return_code)
{
# Parse output
foreach my $line (split/\n/, $remote_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$remote_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }});
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_vm_count => $local_vm_count,
remote_vm_count => $remote_vm_count,
}});
if ((not $local_vm_count) && (not $remote_vm_count))
{
print "No servers running on either node, stopping daemons.\n";
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if ($local_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped locally.\n";
}
elsif ($local_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] locally\n";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if ($remote_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n";
}
elsif ($remote_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
}
}
}
}
return(0);
}
=cut =cut
STATES STATES
@ -296,6 +575,9 @@ sub start_server
{ {
my ($anvil) = @_; my ($anvil) = @_;
# Before we do anything, make sure that 'libvirtd' and 'drbd' services are running.
check_services($anvil, "start");
# Start procedure; # Start procedure;
# 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails. # 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails.
# 2. Make sure the name matches. # 2. Make sure the name matches.
@ -310,7 +592,6 @@ sub start_server
# 6.4. Make sure the backing device is 'Connected' or 'Connecting'. Call a connect if not. # 6.4. Make sure the backing device is 'Connected' or 'Connecting'. Call a connect if not.
# 7. Make sure all bridges exist and soft error if not. # 7. Make sure all bridges exist and soft error if not.
# 8. Start the server. # 8. Start the server.
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0303", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0303", variables => { server => $server }});

@ -192,6 +192,8 @@ The error was:
<key name="error_0130">The answer: [#!variable!answer!#] is invalid. Please try again.</key> <key name="error_0130">The answer: [#!variable!answer!#] is invalid. Please try again.</key>
<key name="error_0131">The host UUID: [#!variable!host_uuid!#] was not found. Has it already been purged?</key> <key name="error_0131">The host UUID: [#!variable!host_uuid!#] was not found. Has it already been purged?</key>
<key name="error_0132">Failed to remove the symlink: [#!variable!symlink!#]!</key> <key name="error_0132">Failed to remove the symlink: [#!variable!symlink!#]!</key>
<key name="error_0133">Failed to read or parse the CIB! Is pacemaker running?</key>
<key name="error_0134">Failed to start the daemon: [#!variable!daemon!#] on the local system, unable to boot the server.</key>
<!-- Table headers --> <!-- Table headers -->
<key name="header_0001">Current Network Interfaces and States</key> <key name="header_0001">Current Network Interfaces and States</key>
@ -341,7 +343,14 @@ Failure! The return code: [#!variable!return_code!#] was received ('0' was expec
<key name="job_0099">No existing cluster found, will run initial setup.</key> <key name="job_0099">No existing cluster found, will run initial setup.</key>
<key name="job_0100">The corosync.conf file does not exist locally, but it does exist on the peer. Copying the file to here.</key> <key name="job_0100">The corosync.conf file does not exist locally, but it does exist on the peer. Copying the file to here.</key>
<key name="job_0101"><![CDATA[[ Error ] - Something went wrong while trying to initialize the cluster. The error, if any, was: [#!variable!error!#].]]></key> <key name="job_0101"><![CDATA[[ Error ] - Something went wrong while trying to initialize the cluster. The error, if any, was: [#!variable!error!#].]]></key>
<key name="job_0102">Starting the cluster now...</key> <key name="job_0102">Starting the cluster (on both nodes) now.</key>
<key name="job_0103">We're node 2, so we will wait until the peer starts the cluster.</key>
<key name="job_0104">Both nodes are up!</key>
<key name="job_0105">Still waiting. Node 1: [#!variable!node1_name!#] ready: [#!variable!node1_ready!#] (in_ccm/crmd/join: [#!variable!node1_in_ccm!#/#!variable!node1_crmd!#/#!variable!node1_join!#]), Node 2: [#!variable!node2_name!#] ready: [#!variable!node1_ready!#] (in_ccm/crmd/join: [#!variable!node2_in_ccm!#/#!variable!node2_crmd!#/#!variable!node2_join!#])</key>
<key name="job_0106">Cluster hasn't started, calling local start.</key>
<key name="job_0107">Corosync is not yet configured, waiting. It will be created when node 1 initializes the cluster.</key>
<key name="job_0108">Corosync is configured. Will wait for the cluster to start. If it hasn't started in two minutes, we'll try to join it.</key>
<key name="job_0109">We will now wait for the cluster to start.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>
@ -904,6 +913,10 @@ If the targets are unique, did you copy the full database directory? A unique id
<key name="log_0479">Removing the symlink: [#!variable!symlink!#].</key> <key name="log_0479">Removing the symlink: [#!variable!symlink!#].</key>
<key name="log_0480">Updating the cache state file.</key> <key name="log_0480">Updating the cache state file.</key>
<key name="log_0481">[ Note ] - The host: [#!variable!host!#] entry in /etc/hosts has changed IP from: [#!variable!old_ip!#] to: [#!variable!new_ip!#].</key> <key name="log_0481">[ Note ] - The host: [#!variable!host!#] entry in /etc/hosts has changed IP from: [#!variable!old_ip!#] to: [#!variable!new_ip!#].</key>
<key name="log_0482">Starting the daemon: [#!variable!daemon!#] locally.</key>
<key name="log_0483">Verifying that the daemon: [#!variable!daemon!#] has started.</key>
<key name="log_0484">Waiting for the daemon: [#!variable!daemon!#] to start...</key>
<key name="log_0485">The daemon: [#!variable!daemon!#] was already running locally, no need to start.</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -33,14 +33,14 @@ $| = 1;
my $anvil = Anvil::Tools->new(); my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2}); $anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1}); $anvil->Log->secure({set => 1});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is # Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is
# passed directly, it will be used. Otherwise, the password will be read from the database. # passed directly, it will be used. Otherwise, the password will be read from the database.
$anvil->Get->switches; $anvil->Get->switches;
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
@ -87,9 +87,6 @@ sub configure_pacemaker
my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name; my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name;
my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
my $escaped_password = shell_quote($new_password); my $escaped_password = shell_quote($new_password);
my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
machine => $machine, machine => $machine,
anvil_uuid => $anvil_uuid, anvil_uuid => $anvil_uuid,
@ -104,7 +101,6 @@ sub configure_pacemaker
peer_host_uuid => $peer_host_uuid, peer_host_uuid => $peer_host_uuid,
new_password => $anvil->Log->is_secure($new_password), new_password => $anvil->Log->is_secure($new_password),
escaped_password => $anvil->Log->is_secure($escaped_password), escaped_password => $anvil->Log->is_secure($escaped_password),
auth_shell_call => $anvil->Log->is_secure($auth_shell_call),
}}); }});
# If this is a DR box, we don't use pacemaker. # If this is a DR box, we don't use pacemaker.
@ -171,21 +167,96 @@ sub configure_pacemaker
} }
} }
### Run on node 1 only. # Node 1 initializes, node 2 waits.
if ($machine eq "node2") if ($machine eq "node2")
{ {
my $start_time = 0;
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0103");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0103"});
# We loop until the peer finishes or the peer's job hit's 100. # We loop until the peer finishes or the peer's job hit's 100.
my $tried_starting = 0;
my $both_online = 0;
until($both_online)
{
if (-e $anvil->data->{path}{configs}{'corosync.conf'})
{
if (not $start_time)
{
# Corosync is configured, we'll wait up to two minutes and then try
# joining the cluster ourselves.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0108"});
$start_time = time + 120;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }});
} }
else elsif ((time > $start_time) && (not $tried_starting))
{
# We've waited a minute, time to try starting the cluster.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0106"});
my $cluster_started = $anvil->Cluster->start_cluster({debug => 2, all => 1});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_started => $cluster_started }});
# Mark that weve tried to start.
$tried_starting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tried_starting => $tried_starting }});
}
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
# See if both nodes are online.
my $node1_ready = $anvil->Cluster->check_node_status({node_name => $node1_host_name});
my $node2_ready = $anvil->Cluster->check_node_status({node_name => $node2_host_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
node1_ready => $node1_ready,
node2_ready => $node2_ready,
}});
if (($node1_ready) && ($node2_ready))
{ {
# Proceed with cluster setup. $both_online = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { both_online => $both_online }});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0104");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0104"});
}
else
{
# Not online yet, wait a bit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0105", variables => {
node1_name => $node1_host_name,
node1_ready => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{in_ccm},
node1_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{crmd},
node1_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{'join'},
node1_join => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{ready},
node2_name => $node2_host_name,
node2_ready => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{in_ccm},
node2_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{crmd},
node2_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{'join'},
node2_join => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{ready},
}});
}
}
}
else
{
# corosync.conf doesn't exist yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0107"});
}
sleep 5 if not $both_online;
}
}
else
{
# We're node 1, proceed with cluster setup.
my $waiting = 1; my $waiting = 1;
my $warning_printed = 0; my $warning_printed = 0;
while($waiting) while($waiting)
{ {
# Try to authenticate against the peer.
my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password;
my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call}); my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, output => $output,
return_code => $return_code, return_code => $return_code,
}}); }});
@ -221,10 +292,10 @@ sub configure_pacemaker
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0099", variables => { anvil_name => $anvil_name }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0099", variables => { anvil_name => $anvil_name }});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster setup ".$anvil_name." ".$node1_host_name." ".$node2_host_name; my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster setup ".$anvil_name." ".$node1_host_name." ".$node2_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, output => $output,
return_code => $return_code, return_code => $return_code,
}}); }});
@ -237,46 +308,64 @@ sub configure_pacemaker
} }
} }
# Now, if we can read the CIB, see where the setup is. If not, start by setting up the # If we can parse the CIB, then pcsd is running.
# cluster. my $problem = $anvil->Cluster->parse_cib({debug => 2});
my $cib_data = ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
my $cluster_started = 0; if ($problem)
until ($cib_data)
{
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }});
($cib_data, my $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
cib_data => $cib_data,
return_code => $return_code,
}});
if ($return_code)
{
if (not $cluster_started)
{ {
# Start the cluster. # Start the cluster.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"});
$cluster_started = 1; my $cluster_started = $anvil->Cluster->start_cluster({debug => 2, all => 1});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_started => $cluster_started }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { }
cluster_started => $cluster_started,
shell_call => $shell_call, # Now wait for both nodes to come online.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0109");
my $both_online = 0;
until ($both_online)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
# See if both nodes are online.
my $node1_ready = $anvil->Cluster->check_node_status({node_name => $node1_host_name});
my $node2_ready = $anvil->Cluster->check_node_status({node_name => $node2_host_name});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
node1_ready => $node1_ready,
node2_ready => $node2_ready,
}}); }});
if (($node1_ready) && ($node2_ready))
{
$both_online = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { both_online => $both_online }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0104");
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0104"});
output => $output, }
return_code => $return_code, else
{
# Not online yet, wait a bit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0105", variables => {
node1_name => $node1_host_name,
node1_ready => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{in_ccm},
node1_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{crmd},
node1_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{'join'},
node1_join => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{ready},
node2_name => $node2_host_name,
node2_ready => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{in_ccm},
node2_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{crmd},
node2_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{'join'},
node2_join => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{ready},
}}); }});
} }
} }
die; sleep 5 if not $both_online;
} }
die;
} }
die;
=cut =cut
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}; $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed};
$VAR1 = { $VAR1 = {
@ -957,14 +1046,14 @@ sub check_local_network
if ($restart_interface_count) if ($restart_interface_count)
{ {
# Disconnect from the database, as we're about to tear down our connection. # Disconnect from the database, as we're about to tear down our connection.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0079"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0079"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0079"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0079");
$anvil->Database->disconnect(); $anvil->Database->disconnect();
# Tell nmcli to re-read the config files. # Tell nmcli to re-read the config files.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0463"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0463"});
my ($output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{nmcli}." connection reload"}); my ($output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{nmcli}." connection reload"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output, output => $output,
return_code => $return_code, return_code => $return_code,
}}); }});
@ -984,7 +1073,7 @@ sub check_local_network
{ {
$anvil->refresh(); $anvil->refresh();
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
if (time > $wait_until) if (time > $wait_until)
@ -999,7 +1088,7 @@ sub check_local_network
} }
} }
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0084"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0084"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0084"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0084");
} }
@ -1007,13 +1096,13 @@ sub check_local_network
if (exists $anvil->data->{network}{'local'}{interface}{virbr0}) if (exists $anvil->data->{network}{'local'}{interface}{virbr0})
{ {
# Remove the NAT'ed bridge # Remove the NAT'ed bridge
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0085"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0085"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0085"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0085");
$anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-destroy default"}); $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-destroy default"});
$anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-undefine default "}); $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-undefine default "});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0034"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0034"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034");
} }
@ -1021,7 +1110,7 @@ sub check_local_network
$anvil->Network->read_nmcli({debug => 2}); $anvil->Network->read_nmcli({debug => 2});
$anvil->Network->get_ips({debug => 3}); $anvil->Network->get_ips({debug => 3});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0086"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0086"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0086"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0086");
# Update MTUs (running interface and config) if needed. # Update MTUs (running interface and config) if needed.
@ -1102,7 +1191,7 @@ sub check_local_network
overwrite => 1, overwrite => 1,
}); });
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0034"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0034"});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034"); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034");
} }
} }
@ -1126,7 +1215,7 @@ sub check_local_network
my $old_config = $anvil->Storage->read_file({file => $anvil->data->{path}{data}{'chrony.conf'}}); my $old_config = $anvil->Storage->read_file({file => $anvil->data->{path}{data}{'chrony.conf'}});
foreach my $line (split/\n/, $old_config) foreach my $line (split/\n/, $old_config)
{ {
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { line => $line }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
$new_config .= $line."\n"; $new_config .= $line."\n";
if ($line =~ /^Server (.*)$/) if ($line =~ /^Server (.*)$/)
{ {

@ -21,8 +21,265 @@ my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2}); $anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1}); $anvil->Log->secure({set => 1});
# print "Connecting to the database(s);\n"; print "Connecting to the database(s);\n";
# $anvil->Database->connect(); $anvil->Database->connect();
# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"});
$anvil->Cluster->parse_cib({debug => 2}); $anvil->data->{switches}{start} = "";
$anvil->data->{switches}{stop} = "";
$anvil->Get->switches;
my $peer = $anvil->Cluster->get_peers();
my $i_am = $anvil->data->{sys}{anvil}{i_am};
my $peer_is = $anvil->data->{sys}{anvil}{peer_is};
my $my_name = $i_am ? $anvil->data->{sys}{anvil}{$i_am}{host_name} : "--";
my $peer_name = $peer_is ? $anvil->data->{sys}{anvil}{$peer_is}{host_name} : "--";
print "I am: .. [".$i_am."], my host name is: . [".$my_name."]\n";
print "Peer is: [".$peer_is."], peer host name is: [".$peer_name."]\n";
print "- Returned peer: [".$peer."]\n";
if ($anvil->data->{switches}{start})
{
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if ($local_return_code eq "3")
{
# Stopped, start it..
print "Starting: [".$daemon."] locally\n";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code eq "0")
{
$running = 1;
print "Verified start of: [".$daemon."]\n";
}
else
{
$loops++;
if ($loops > 3)
{
# Give up
print "[ Error ] - Start of: [".$daemon."] appears to have failed!\n";
die;
}
else
{
# Wait for a second.
sleep 1;
print "Waiting for: [".$daemon."] to start...\n";
}
}
}
}
elsif ($local_return_code eq "0")
{
# Running, nothing to do.
print "The daemon: [".$daemon."] is already running locally.\n";
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if ($remote_return_code eq "3")
{
# Stopped, start it..
print "Starting: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
my $loops = 0;
my $running = 0;
until ($running)
{
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
if ($return_code eq "0")
{
$running = 1;
print "Verified start of: [".$daemon."] on: [".$peer_name."]\n";
}
else
{
$loops++;
if ($loops > 3)
{
# Give up
print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n";
die;
}
else
{
# Wait for a second.
sleep 1;
print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n";
}
}
}
}
elsif ($remote_return_code eq "0")
{
# Running, nothing to do.
print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n";
}
}
}
elsif ($anvil->data->{switches}{stop})
{
my $stop = 0;
# Check both nodes if a server is running on either node.
my $local_vm_count = 0;
my $remote_vm_count = 0;
# Call virsh list --all
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if (not $local_return_code)
{
# Parse output
foreach my $line (split/\n/, $local_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$local_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }});
}
}
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{virsh}." list --all",
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if (not $remote_return_code)
{
# Parse output
foreach my $line (split/\n/, $remote_output)
{
$line = $anvil->Words->clean_spaces({ string => $line });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }});
if ($line =~ /(\d+)\s+(.*?)\s+running/)
{
$remote_vm_count++;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }});
}
}
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_vm_count => $local_vm_count,
remote_vm_count => $remote_vm_count,
}});
if ((not $local_vm_count) && (not $remote_vm_count))
{
print "No servers running on either node, stopping daemons.\n";
foreach my $daemon ("libvirtd.service", "drbd.service")
{
my $running_local = 0;
my $running_peer = 0;
my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_output => $local_output,
local_return_code => $local_return_code,
}});
if ($local_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped locally.\n";
}
elsif ($local_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] locally\n";
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
remote_output => $remote_output,
remote_error => $remote_error,
remote_return_code => $remote_return_code,
}});
if ($remote_return_code eq "3")
{
# Already stopped.
print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n";
}
elsif ($remote_return_code eq "0")
{
# Running, stop it.
print "Stopping: [".$daemon."] on: [".$peer_name."]\n";
my ($output, $error, $return_code) = $anvil->Remote->call({
target => $peer_name,
shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
error => $error,
return_code => $return_code,
}});
}
}
}
}

Loading…
Cancel
Save