diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm index 371100a6..ce1a77af 100644 --- a/Anvil/Tools/Cluster.pm +++ b/Anvil/Tools/Cluster.pm @@ -14,7 +14,10 @@ our $VERSION = "3.0.0"; my $THIS_FILE = "Cluster.pm"; ### Methods; +# check_node_status +# get_peers # parse_cib +# start_cluster =pod @@ -74,6 +77,176 @@ sub parent # Public methods # ############################################################################################################# +=head2 check_node_status + +This takes a node name (generally the short host name) and, using a C<< parse_cib >> call data (made before calling this method), the node's ready state will be checked. If the node is ready, C<< 1 >> is returned. If not, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned. + +Parameters; + +=head3 node_name (required) + +This is the node name as used when configured in the cluster. In most cases, this is the short host name. + +=cut +sub check_node_status +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->check_node_status()" }}); + + my $node_name = defined $parameter->{node_name} ? $parameter->{node_name} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + node_name => $node_name, + }}); + + if (not $node_name) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Database->get_host_from_uuid()", parameter => "host_uuid" }}); + return("!!error!!"); + } + + if (not exists $anvil->data->{cib}{parsed}{data}{node}{$node_name}) + { + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm} = 0; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd} = 0; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'} = 0; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready} = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::data::node::${node_name}::node_state::in_ccm" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm}, + "cib::parsed::data::node::${node_name}::node_state::crmd" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd}, + "cib::parsed::data::node::${node_name}::node_state::join" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'}, + "cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}, + }}); + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}, + }}); + return($anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}); +} + +=head2 get_peers + +This method uses the local machine's host UUID and finds the host names of the cluster memebers. If this host is in a cluster and it is a node, the peer's short host name is returned. Otherwise, an empty string is returned. + +The data is stored as; + + sys::anvil::node1::host_uuid + sys::anvil::node1::host_name + sys::anvil::node2::host_uuid + sys::anvil::node2::host_name + sys::anvil::dr1::host_uuid + sys::anvil::dr1::host_name + +To assist with lookup, the following are also set; + + sys::anvil::i_am = {node1,node2,dr1} + sys::anvil::peer_is = {node1,node2} # Not set if this host is 'dr1' + +This method takes no parameters. + +=cut +sub get_peers +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->get_peers()" }}); + + $anvil->data->{sys}{anvil}{node1}{host_uuid} = ""; + $anvil->data->{sys}{anvil}{node1}{host_name} = ""; + $anvil->data->{sys}{anvil}{node2}{host_uuid} = ""; + $anvil->data->{sys}{anvil}{node2}{host_name} = ""; + $anvil->data->{sys}{anvil}{dr1}{host_uuid} = ""; + $anvil->data->{sys}{anvil}{dr1}{host_name} = ""; + $anvil->data->{sys}{anvil}{i_am} = ""; + $anvil->data->{sys}{anvil}{peer_is} = ""; + + # Load hosts and anvils + $anvil->Database->get_hosts({debug => $debug}); + $anvil->Database->get_anvils({debug => $debug}); + + # Is ths host in an anvil? + my $host_uuid = $anvil->Get->host_uuid({debug => $debug}); + my $in_anvil = ""; + my $found = 0; + my $peer = ""; + + foreach my $anvil_uuid (keys %{$anvil->data->{anvils}{anvil_uuid}}) + { + my $anvil_node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $anvil_node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $anvil_dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + anvil_node1_host_uuid => $anvil_node1_host_uuid, + anvil_node2_host_uuid => $anvil_node2_host_uuid, + anvil_dr1_host_uuid => $anvil_dr1_host_uuid, + }}); + + if ($host_uuid eq $anvil_node1_host_uuid) + { + # Found our Anvil!, and we're node 1. + $found = 1; + $anvil->data->{sys}{anvil}{i_am} = "node1"; + $anvil->data->{sys}{anvil}{peer_is} = "node2"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + found => $found, + "sys::anvil::i_am" => $anvil->data->{sys}{anvil}{i_am}, + "sys::anvil::peer_is" => $anvil->data->{sys}{anvil}{peer_is}, + }}); + } + elsif ($host_uuid eq $anvil_node2_host_uuid) + { + # Found our Anvil!, and we're node 1. + $found = 1; + $anvil->data->{sys}{anvil}{i_am} = "node2"; + $anvil->data->{sys}{anvil}{peer_is} = "node1"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + found => $found, + "sys::anvil::i_am" => $anvil->data->{sys}{anvil}{i_am}, + "sys::anvil::peer_is" => $anvil->data->{sys}{anvil}{peer_is}, + }}); + } + elsif ($host_uuid eq $anvil_dr1_host_uuid) + { + # Found our Anvil!, and we're node 1. + $found = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { found => $found }}); + } + if ($found) + { + $anvil->data->{sys}{anvil}{node1}{host_uuid} = $anvil_node1_host_uuid; + $anvil->data->{sys}{anvil}{node1}{host_name} = $anvil->data->{hosts}{host_uuid}{$anvil_node1_host_uuid}{host_name}; + $anvil->data->{sys}{anvil}{node2}{host_uuid} = $anvil_node2_host_uuid; + $anvil->data->{sys}{anvil}{node2}{host_name} = $anvil->data->{hosts}{host_uuid}{$anvil_node2_host_uuid}{host_name}; + $anvil->data->{sys}{anvil}{dr1}{host_uuid} = $anvil_dr1_host_uuid ? $anvil_dr1_host_uuid : ""; + $anvil->data->{sys}{anvil}{dr1}{host_name} = $anvil_dr1_host_uuid ? $anvil->data->{hosts}{host_uuid}{$anvil_dr1_host_uuid}{host_name} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "sys::anvil::node1::host_uuid" => $anvil->data->{sys}{anvil}{node1}{host_uuid}, + "sys::anvil::node1::host_name" => $anvil->data->{sys}{anvil}{node1}{host_name}, + "sys::anvil::node2::host_uuid" => $anvil->data->{sys}{anvil}{node2}{host_uuid}, + "sys::anvil::node2::host_name" => $anvil->data->{sys}{anvil}{node2}{host_name}, + "sys::anvil::dr1::host_uuid" => $anvil->data->{sys}{anvil}{dr1}{host_uuid}, + "sys::anvil::dr1::host_name" => $anvil->data->{sys}{anvil}{dr1}{host_name}, + }}); + + # If this is a node, return the peer's short host name. + if ($anvil->data->{sys}{anvil}{i_am}) + { + $peer = $anvil->data->{sys}{anvil}{i_am} eq "node1" ? $anvil->data->{sys}{anvil}{node1}{host_name} : $anvil->data->{sys}{anvil}{node2}{host_name}; + $peer =~ s/\..*//; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { peer => $peer }}); + } + last; + } + } + + return($peer); +} + =head2 parse_cib This reads in the CIB XML and parses it. On success, it returns C<< 0 >>. On failure (ie: pcsd isn't running), returns C<< 1 >>. @@ -92,6 +265,11 @@ sub parse_cib { delete $anvil->data->{cib}{parsed}; } + # This stores select data we've pulled out that's meant to be easier to find. + if (exists $anvil->data->{cib}{data}) + { + delete $anvil->data->{cib}{data}; + } my $problem = 1; my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib"; @@ -119,9 +297,44 @@ sub parse_cib } else { + ### NOTE: Full CIB details; + ### - https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html-single/Pacemaker_Explained/index.html # Successful parse! $problem = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }}); + foreach my $nvpair ($dom->findnodes('/cib/configuration/crm_config/cluster_property_set/nvpair')) + { + my $nvpair_id = $nvpair->{id}; + foreach my $variable (sort {$a cmp $b} keys %{$nvpair}) + { + next if $variable eq "id"; + $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable} = $nvpair->{$variable}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::configuration::crm_config::cluster_property_set::nvpair::${nvpair_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable}, + }}); + } + } + foreach my $node ($dom->findnodes('/cib/configuration/nodes/node')) + { + my $node_id = $node->{id}; + foreach my $variable (sort {$a cmp $b} keys %{$node}) + { + next if $variable eq "id"; + $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable} = $node->{$variable}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::configuration::nodes::${node_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable}, + }}); + + if ($variable eq "uname") + { + my $node = $node->{$variable}; + $anvil->data->{cib}{parsed}{data}{node}{$node}{id} = $node_id; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::data::node::${node}::id" => $anvil->data->{cib}{parsed}{data}{node}{$node}{id}, + }}); + } + } + } foreach my $clone ($dom->findnodes('/cib/configuration/resources/clone')) { my $clone_id = $clone->{id}; @@ -164,6 +377,57 @@ sub parse_cib } } } + ### TODO: /cib/configuration/constraints + foreach my $node_state ($dom->findnodes('/cib/status/node_state')) + { + my $id = $node_state->{id}; + foreach my $variable (sort {$a cmp $b} keys %{$node_state}) + { + next if $variable eq "id"; + $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable} = $node_state->{$variable}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::cib::node_state::${id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable}, + }}); + } + foreach my $lrm ($node_state->findnodes('./lrm')) + { + my $lrm_id = $lrm->{id}; + foreach my $lrm_resource ($lrm->findnodes('./lrm_resources/lrm_resource')) + { + my $lrm_resource_id = $lrm_resource->{id}; + $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{type} = $lrm_resource->{type}; + $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{class} = $lrm_resource->{class}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::type" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{type}, + "cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::class" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{class}, + }}); + foreach my $lrm_rsc_op ($lrm_resource->findnodes('./lrm_rsc_op')) + { + my $lrm_rsc_op_id = $lrm_rsc_op->{id}; + foreach my $variable (sort {$a cmp $b} keys %{$lrm_rsc_op}) + { + next if $variable eq "id"; + $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}{$lrm_rsc_op_id}{$variable} = $lrm_rsc_op->{$variable}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::cib::status::node_state::${id}::lrm_id::${lrm_id}::lrm_resource::${lrm_resource_id}::lrm_rsc_op_id::${lrm_rsc_op_id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}{$lrm_rsc_op_id}{$variable}, + }}); + } + } + } + } + foreach my $transient_attributes ($node_state->findnodes('./transient_attributes')) + { + # Currently, there seems to be no other data stored here. + my $transient_attributes_id = $transient_attributes->{id}; + foreach my $instance_attributes ($transient_attributes->findnodes('./instance_attributes')) + { + $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{transient_attributes_id}{$transient_attributes_id}{instance_attributes_id} = $instance_attributes->{id}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::cib::status::node_state::${id}::transient_attributes_id::${transient_attributes_id}::instance_attributes_id" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{transient_attributes_id}{$transient_attributes_id}{instance_attributes_id}, + }}); + } + } + } foreach my $primitive ($dom->findnodes('/cib/configuration/resources/primitive')) { my $id = $primitive->{id}; @@ -208,51 +472,99 @@ sub parse_cib }}); } } - foreach my $nvpair ($dom->findnodes('/cib/configuration/crm_config/cluster_property_set/nvpair')) - { - my $nvpair_id = $nvpair->{id}; - foreach my $variable (sort {$a cmp $b} keys %{$nvpair}) - { - next if $variable eq "id"; - $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable} = $nvpair->{$variable}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "cib::parsed::configuration::crm_config::cluster_property_set::nvpair::${nvpair_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{crm_config}{cluster_property_set}{nvpair}{$nvpair_id}{$variable}, - }}); - } - } - foreach my $node ($dom->findnodes('/cib/configuration/nodes/node')) - { - my $node_id = $node->{id}; - foreach my $variable (sort {$a cmp $b} keys %{$node}) - { - next if $variable eq "id"; - $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable} = $node->{$variable}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "cib::parsed::configuration::nodes::${node_id}::${variable}" => $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{$variable}, - }}); - } - } - foreach my $node_state ($dom->findnodes('/cib/status/node_state')) - { - my $id = $node_state->{id}; - foreach my $variable (sort {$a cmp $b} keys %{$node_state}) - { - next if $variable eq "id"; - $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable} = $node_state->{$variable}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "cib::parsed::cib::node_state::${id}::${variable}" => $anvil->data->{cib}{parsed}{cib}{node_state}{$id}{$variable}, - }}); - } - } - die; } } - #print Dumper $anvil->data->{cib}{parsed}; + # Pull some data out for easier access. + $anvil->data->{cib}{parsed}{peer}{ready} = ""; + $anvil->data->{cib}{parsed}{peer}{name} = ""; + foreach my $node_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{node}}) + { + # The "coming up" order is 'in_ccm' then 'crmd' then 'join'. + my $node_id = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{id}; + my $in_ccm = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{in_ccm} eq "true" ? 1 : 0; # 'true' or 'false' - Corosync member + my $crmd = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{crmd} eq "online" ? 1 : 0; # 'online' or 'offline' - In corosync process group + my $join = $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{'join'} eq "member" ? 1 : 0; # 'member' or 'down' - Completed controller join process + my $ready = (($in_ccm) && ($crmd) && ($join)) ? 1 : 0; # Our summary of if the node is "up" + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:node_name' => $node_name, + 's2:node_id' => $node_id, + 's3:in_ccm' => $in_ccm, + 's4:crmd' => $crmd, + 's5:join' => $join, + 's6:ready' => $ready, + }}); + + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm} = $in_ccm; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd} = $crmd; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'} = $join; + $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready} = $ready; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::data::node::${node_name}::node_state::in_ccm" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm}, + "cib::parsed::data::node::${node_name}::node_state::crmd" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd}, + "cib::parsed::data::node::${node_name}::node_state::join" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'}, + "cib::parsed::data::node::${node_name}::node_state::ready" => $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}, + }}); + + # Is this me or the peer? + if (($node_name ne $anvil->_host_name) && ($node_name ne $anvil->_short_host_name)) + { + # It's our peer. + $anvil->data->{cib}{parsed}{peer}{ready} = $ready; + $anvil->data->{cib}{parsed}{peer}{name} = $node_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "cib::parsed::peer::ready" => $anvil->data->{cib}{parsed}{peer}{ready}, + "cib::parsed::peer::name" => $anvil->data->{cib}{parsed}{peer}{name}, + }}); + } + } return($problem); } +=head2 start_cluster + +This will join the local node to the pacemaker cluster. Optionally, it can try to start the cluster on both nodes if C<< all >> is set. + +Parameters; + +=head3 all (optional, default '0') + +If set, the cluster will be started on both (all) nodes. + +=cut +sub start_cluster +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->parse_cib()" }}); + + my $all = defined $parameter->{all} ? $parameter->{all} : 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + all => $all, + }}); + + my $success = 1; + my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start"; + if ($all) + { + $shell_call .= " --all"; + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { + shell_call => $shell_call, + }}); + + my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { + output => $output, + return_code => $return_code, + }}); + + return($success); +} + # =head3 # # Private Functions; diff --git a/Anvil/Tools/Remote.pm b/Anvil/Tools/Remote.pm index 27670c1f..18712957 100644 --- a/Anvil/Tools/Remote.pm +++ b/Anvil/Tools/Remote.pm @@ -664,9 +664,11 @@ sub call my $clean_output = ""; foreach my $line (split/\n/, $output) { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { line => $line }}); if ($line =~ /^return_code:(\d+)$/) { $return_code = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => $secure, list => { return_code => $return_code }}); } elsif ($line =~ /return_code:(\d+)$/) { @@ -677,7 +679,7 @@ sub call $return_code = $1; $line =~ s/return_code:\d+$//; $output .= $line."\n"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, secure => 0, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line, output => $output, return_code => $return_code, diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 5601bf6b..995cc1f2 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -35,7 +35,7 @@ # - Pacemaker interprets this exit code as a soft error. # # 2 - OCF_ERR_ARGS -# - The resource’s configuration is not valid on this machine. This can happen if the serve fails to boot +# - The resource’s configuration is not valid on this machine. This can happen if the server fails to boot # because of a missing bridge, for example. # # 3 - OCF_ERR_UNIMPLEMENTED @@ -268,6 +268,285 @@ $anvil->nice_exit({exit_code => 255}); # Functions # ############################################################################################################# +# This will either verify that 'libvirtd' and 'drbd' are running (and start them if not) is called with +# "start". If called with "stop", a check is made on both nodes. If all VMs are gone, "libvirtd" and "drbd" +# are stopped. +sub check_services +{ + my ($anvil, $task) = @_; + + my $problem = $anvil->Cluster->parse_cib(); + if ($problem) + { + # Pacemaker isn't running, or some other problem. Someone must have called this script + # directly or something. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0133"}); + $anvil->nice_exit({exit_code => 1}); + } + + # Is the peer running? We'll use this to know whether to try and start daemons on the peer. + my $peer_name = $anvil->Cluster->get_peers(); + my $peer_ready = $anvil->data->{cib}{parsed}{peer}{ready}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer_name => $peer_name, + peer_ready => $peer_ready, + }}); + + if ($task eq "start") + { + foreach my $daemon ("libvirtd.service", "drbd.service") + { + my $running_local = 0; + my $running_peer = 0; + + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code eq "3") + { + # It is stopped, start it.. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0482", variables => { daemon => $daemon }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + my $loops = 0; + my $running = 0; + until ($running) + { + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code eq "0") + { + # It's running + $running = 1; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0483", variables => { daemon => $daemon }}); + } + else + { + $loops++; + if ($loops > 5) + { + # Give up + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0134", variables => { daemon => $daemon }}); + $anvil->nice_exit({exit_code => 1}); + } + else + { + # Wait for a second. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0484", variables => { daemon => $daemon }}); + sleep 1; + } + } + } + } + elsif ($return_code eq "0") + { + # Running, nothing to do. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0485", variables => { daemon => $daemon }}); + } + + ### TODO: Left off here. + if ($peer_ready) + { + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + if ($return_code eq "3") + { + # Stopped, start it.. + print "Starting: [".$daemon."] on: [".$peer_name."]\n"; + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + + my $loops = 0; + my $running = 0; + until ($running) + { + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + if ($return_code eq "0") + { + $running = 1; + print "Verified start of: [".$daemon."] on: [".$peer_name."]\n"; + } + else + { + $loops++; + if ($loops > 3) + { + # Give up + print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n"; + die; + } + else + { + # Wait for a second. + sleep 1; + print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n"; + } + } + } + } + elsif ($return_code eq "0") + { + # Running, nothing to do. + print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n"; + } + } + } + } + if ($task eq "stop") + { + my $stop = 0; + + # Check both nodes if a server is running on either node. + my $local_vm_count = 0; + my $remote_vm_count = 0; + + # Call virsh list --all + my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_output => $local_output, + local_return_code => $local_return_code, + }}); + if (not $local_return_code) + { + # Parse output + foreach my $line (split/\n/, $local_output) + { + $line = $anvil->Words->clean_spaces({ string => $line }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + + if ($line =~ /(\d+)\s+(.*?)\s+running/) + { + $local_vm_count++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }}); + } + } + } + + my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{virsh}." list --all", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + remote_output => $remote_output, + remote_error => $remote_error, + remote_return_code => $remote_return_code, + }}); + if (not $remote_return_code) + { + # Parse output + foreach my $line (split/\n/, $remote_output) + { + $line = $anvil->Words->clean_spaces({ string => $line }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + + if ($line =~ /(\d+)\s+(.*?)\s+running/) + { + $remote_vm_count++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }}); + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_vm_count => $local_vm_count, + remote_vm_count => $remote_vm_count, + }}); + if ((not $local_vm_count) && (not $remote_vm_count)) + { + print "No servers running on either node, stopping daemons.\n"; + foreach my $daemon ("libvirtd.service", "drbd.service") + { + my $running_local = 0; + my $running_peer = 0; + + my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_output => $local_output, + local_return_code => $local_return_code, + }}); + if ($local_return_code eq "3") + { + # Already stopped. + print "The daemon: [".$daemon."] is already stopped locally.\n"; + } + elsif ($local_return_code eq "0") + { + # Running, stop it. + print "Stopping: [".$daemon."] locally\n"; + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + } + + my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + remote_output => $remote_output, + remote_error => $remote_error, + remote_return_code => $remote_return_code, + }}); + if ($remote_return_code eq "3") + { + # Already stopped. + print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n"; + } + elsif ($remote_return_code eq "0") + { + # Running, stop it. + print "Stopping: [".$daemon."] on: [".$peer_name."]\n"; + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + } + } + } + } + + + return(0); +} + =cut STATES @@ -296,6 +575,9 @@ sub start_server { my ($anvil) = @_; + # Before we do anything, make sure that 'libvirtd' and 'drbd' services are running. + check_services($anvil, "start"); + # Start procedure; # 1. Read the XML definition file and find the backing storage and bridges. Soft error if read fails. # 2. Make sure the name matches. @@ -310,7 +592,6 @@ sub start_server # 6.4. Make sure the backing device is 'Connected' or 'Connecting'. Call a connect if not. # 7. Make sure all bridges exist and soft error if not. # 8. Start the server. - my $server = $anvil->data->{environment}{OCF_RESKEY_name}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0303", variables => { server => $server }}); diff --git a/share/words.xml b/share/words.xml index 7bb5892d..b3b16c53 100644 --- a/share/words.xml +++ b/share/words.xml @@ -192,6 +192,8 @@ The error was: The answer: [#!variable!answer!#] is invalid. Please try again. The host UUID: [#!variable!host_uuid!#] was not found. Has it already been purged? Failed to remove the symlink: [#!variable!symlink!#]! + Failed to read or parse the CIB! Is pacemaker running? + Failed to start the daemon: [#!variable!daemon!#] on the local system, unable to boot the server. Current Network Interfaces and States @@ -341,7 +343,14 @@ Failure! The return code: [#!variable!return_code!#] was received ('0' was expec No existing cluster found, will run initial setup. The corosync.conf file does not exist locally, but it does exist on the peer. Copying the file to here. - Starting the cluster now... + Starting the cluster (on both nodes) now. + We're node 2, so we will wait until the peer starts the cluster. + Both nodes are up! + Still waiting. Node 1: [#!variable!node1_name!#] ready: [#!variable!node1_ready!#] (in_ccm/crmd/join: [#!variable!node1_in_ccm!#/#!variable!node1_crmd!#/#!variable!node1_join!#]), Node 2: [#!variable!node2_name!#] ready: [#!variable!node1_ready!#] (in_ccm/crmd/join: [#!variable!node2_in_ccm!#/#!variable!node2_crmd!#/#!variable!node2_join!#]) + Cluster hasn't started, calling local start. + Corosync is not yet configured, waiting. It will be created when node 1 initializes the cluster. + Corosync is configured. Will wait for the cluster to start. If it hasn't started in two minutes, we'll try to join it. + We will now wait for the cluster to start. Starting: [#!variable!program!#]. @@ -904,6 +913,10 @@ If the targets are unique, did you copy the full database directory? A unique id Removing the symlink: [#!variable!symlink!#]. Updating the cache state file. [ Note ] - The host: [#!variable!host!#] entry in /etc/hosts has changed IP from: [#!variable!old_ip!#] to: [#!variable!new_ip!#]. + Starting the daemon: [#!variable!daemon!#] locally. + Verifying that the daemon: [#!variable!daemon!#] has started. + Waiting for the daemon: [#!variable!daemon!#] to start... + The daemon: [#!variable!daemon!#] was already running locally, no need to start. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-join-anvil b/tools/anvil-join-anvil index c88dffb8..5ae0164c 100755 --- a/tools/anvil-join-anvil +++ b/tools/anvil-join-anvil @@ -33,14 +33,14 @@ $| = 1; my $anvil = Anvil::Tools->new(); $anvil->Log->level({set => 2}); $anvil->Log->secure({set => 1}); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0115", variables => { program => $THIS_FILE }}); # Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is # passed directly, it will be used. Otherwise, the password will be read from the database. $anvil->Get->switches; $anvil->Database->connect(); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try @@ -87,9 +87,6 @@ sub configure_pacemaker my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name; my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; my $escaped_password = shell_quote($new_password); - my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { machine => $machine, anvil_uuid => $anvil_uuid, @@ -104,7 +101,6 @@ sub configure_pacemaker peer_host_uuid => $peer_host_uuid, new_password => $anvil->Log->is_secure($new_password), escaped_password => $anvil->Log->is_secure($escaped_password), - auth_shell_call => $anvil->Log->is_secure($auth_shell_call), }}); # If this is a DR box, we don't use pacemaker. @@ -171,21 +167,96 @@ sub configure_pacemaker } } - ### Run on node 1 only. + # Node 1 initializes, node 2 waits. if ($machine eq "node2") { + my $start_time = 0; + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0103"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0103"}); + # We loop until the peer finishes or the peer's job hit's 100. + my $tried_starting = 0; + my $both_online = 0; + until($both_online) + { + if (-e $anvil->data->{path}{configs}{'corosync.conf'}) + { + if (not $start_time) + { + # Corosync is configured, we'll wait up to two minutes and then try + # joining the cluster ourselves. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0108"}); + $start_time = time + 120; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { start_time => $start_time }}); + } + elsif ((time > $start_time) && (not $tried_starting)) + { + # We've waited a minute, time to try starting the cluster. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0106"}); + my $cluster_started = $anvil->Cluster->start_cluster({debug => 2, all => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_started => $cluster_started }}); + + # Mark that weve tried to start. + $tried_starting = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { tried_starting => $tried_starting }}); + } + + my $problem = $anvil->Cluster->parse_cib({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if (not $problem) + { + # See if both nodes are online. + my $node1_ready = $anvil->Cluster->check_node_status({node_name => $node1_host_name}); + my $node2_ready = $anvil->Cluster->check_node_status({node_name => $node2_host_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + node1_ready => $node1_ready, + node2_ready => $node2_ready, + }}); + if (($node1_ready) && ($node2_ready)) + { + $both_online = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { both_online => $both_online }}); + + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0104"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0104"}); + } + else + { + # Not online yet, wait a bit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0105", variables => { + node1_name => $node1_host_name, + node1_ready => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{in_ccm}, + node1_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{crmd}, + node1_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{'join'}, + node1_join => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{ready}, + node2_name => $node2_host_name, + node2_ready => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{in_ccm}, + node2_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{crmd}, + node2_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{'join'}, + node2_join => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{ready}, + }}); + } + } + } + else + { + # corosync.conf doesn't exist yet. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0107"}); + } + sleep 5 if not $both_online; + } } else { - # Proceed with cluster setup. - + # We're node 1, proceed with cluster setup. my $waiting = 1; my $warning_printed = 0; while($waiting) { + # Try to authenticate against the peer. + my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password; my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); @@ -221,10 +292,10 @@ sub configure_pacemaker $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0099", variables => { anvil_name => $anvil_name }}); my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster setup ".$anvil_name." ".$node1_host_name." ".$node2_host_name; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); @@ -237,46 +308,64 @@ sub configure_pacemaker } } - # Now, if we can read the CIB, see where the setup is. If not, start by setting up the - # cluster. - my $cib_data = ""; - my $cluster_started = 0; - until ($cib_data) + # If we can parse the CIB, then pcsd is running. + my $problem = $anvil->Cluster->parse_cib({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) { - my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }}); + # Start the cluster. + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"}); - ($cib_data, my $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { - cib_data => $cib_data, - return_code => $return_code, - }}); - if ($return_code) + my $cluster_started = $anvil->Cluster->start_cluster({debug => 2, all => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_started => $cluster_started }}); + } + + # Now wait for both nodes to come online. + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0109"); + my $both_online = 0; + until ($both_online) + { + my $problem = $anvil->Cluster->parse_cib({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if (not $problem) { - if (not $cluster_started) + # See if both nodes are online. + my $node1_ready = $anvil->Cluster->check_node_status({node_name => $node1_host_name}); + my $node2_ready = $anvil->Cluster->check_node_status({node_name => $node2_host_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + node1_ready => $node1_ready, + node2_ready => $node2_ready, + }}); + if (($node1_ready) && ($node2_ready)) { - # Start the cluster. - update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102"); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"}); - - $cluster_started = 1; - my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { - cluster_started => $cluster_started, - shell_call => $shell_call, - }}); + $both_online = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { both_online => $both_online }}); - my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { - output => $output, - return_code => $return_code, + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0104"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0104"}); + } + else + { + # Not online yet, wait a bit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0105", variables => { + node1_name => $node1_host_name, + node1_ready => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{in_ccm}, + node1_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{crmd}, + node1_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{'join'}, + node1_join => $anvil->data->{cib}{parsed}{data}{node}{$node1_host_name}{node_state}{ready}, + node2_name => $node2_host_name, + node2_ready => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{in_ccm}, + node2_in_ccm => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{crmd}, + node2_crmd => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{'join'}, + node2_join => $anvil->data->{cib}{parsed}{data}{node}{$node2_host_name}{node_state}{ready}, }}); } } - die; + sleep 5 if not $both_online; } - die; } + die; =cut $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}; $VAR1 = { @@ -957,14 +1046,14 @@ sub check_local_network if ($restart_interface_count) { # Disconnect from the database, as we're about to tear down our connection. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0079"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0079"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0079"); $anvil->Database->disconnect(); # Tell nmcli to re-read the config files. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "log_0463"}); my ($output, $return_code) = $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{nmcli}." connection reload"}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $output, return_code => $return_code, }}); @@ -984,7 +1073,7 @@ sub check_local_network { $anvil->refresh(); $anvil->Database->connect(); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, secure => 0, key => "log_0132"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, 'print' => 1, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { if (time > $wait_until) @@ -999,7 +1088,7 @@ sub check_local_network } } - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0084"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0084"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0084"); } @@ -1007,13 +1096,13 @@ sub check_local_network if (exists $anvil->data->{network}{'local'}{interface}{virbr0}) { # Remove the NAT'ed bridge - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0085"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0085"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0085"); $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-destroy default"}); $anvil->System->call({debug => 2, shell_call => $anvil->data->{path}{exe}{virsh}." net-undefine default "}); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0034"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0034"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034"); } @@ -1021,7 +1110,7 @@ sub check_local_network $anvil->Network->read_nmcli({debug => 2}); $anvil->Network->get_ips({debug => 3}); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0086"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0086"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0086"); # Update MTUs (running interface and config) if needed. @@ -1102,7 +1191,7 @@ sub check_local_network overwrite => 1, }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, secure => 0, key => "job_0034"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0034"}); update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0034"); } } @@ -1126,7 +1215,7 @@ sub check_local_network my $old_config = $anvil->Storage->read_file({file => $anvil->data->{path}{data}{'chrony.conf'}}); foreach my $line (split/\n/, $old_config) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { line => $line }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); $new_config .= $line."\n"; if ($line =~ /^Server (.*)$/) { diff --git a/tools/test.pl b/tools/test.pl index dea61171..850c89b2 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -21,8 +21,265 @@ my $anvil = Anvil::Tools->new(); $anvil->Log->level({set => 2}); $anvil->Log->secure({set => 1}); -# print "Connecting to the database(s);\n"; -# $anvil->Database->connect(); -# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); +print "Connecting to the database(s);\n"; +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); -$anvil->Cluster->parse_cib({debug => 2}); +$anvil->data->{switches}{start} = ""; +$anvil->data->{switches}{stop} = ""; +$anvil->Get->switches; + +my $peer = $anvil->Cluster->get_peers(); +my $i_am = $anvil->data->{sys}{anvil}{i_am}; +my $peer_is = $anvil->data->{sys}{anvil}{peer_is}; +my $my_name = $i_am ? $anvil->data->{sys}{anvil}{$i_am}{host_name} : "--"; +my $peer_name = $peer_is ? $anvil->data->{sys}{anvil}{$peer_is}{host_name} : "--"; +print "I am: .. [".$i_am."], my host name is: . [".$my_name."]\n"; +print "Peer is: [".$peer_is."], peer host name is: [".$peer_name."]\n"; +print "- Returned peer: [".$peer."]\n"; + +if ($anvil->data->{switches}{start}) +{ + foreach my $daemon ("libvirtd.service", "drbd.service") + { + my $running_local = 0; + my $running_peer = 0; + + my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_output => $local_output, + local_return_code => $local_return_code, + }}); + if ($local_return_code eq "3") + { + # Stopped, start it.. + print "Starting: [".$daemon."] locally\n"; + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + my $loops = 0; + my $running = 0; + until ($running) + { + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code eq "0") + { + $running = 1; + print "Verified start of: [".$daemon."]\n"; + } + else + { + $loops++; + if ($loops > 3) + { + # Give up + print "[ Error ] - Start of: [".$daemon."] appears to have failed!\n"; + die; + } + else + { + # Wait for a second. + sleep 1; + print "Waiting for: [".$daemon."] to start...\n"; + } + } + } + } + elsif ($local_return_code eq "0") + { + # Running, nothing to do. + print "The daemon: [".$daemon."] is already running locally.\n"; + } + + my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + remote_output => $remote_output, + remote_error => $remote_error, + remote_return_code => $remote_return_code, + }}); + if ($remote_return_code eq "3") + { + # Stopped, start it.. + print "Starting: [".$daemon."] on: [".$peer_name."]\n"; + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + + my $loops = 0; + my $running = 0; + until ($running) + { + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + if ($return_code eq "0") + { + $running = 1; + print "Verified start of: [".$daemon."] on: [".$peer_name."]\n"; + } + else + { + $loops++; + if ($loops > 3) + { + # Give up + print "[ Error ] - Start of: [".$daemon."] on: [".$peer_name."] appears to have failed!\n"; + die; + } + else + { + # Wait for a second. + sleep 1; + print "Waiting for: [".$daemon."] to start on: [".$peer_name."]...\n"; + } + } + } + } + elsif ($remote_return_code eq "0") + { + # Running, nothing to do. + print "The daemon: [".$daemon."] is already running on: [".$peer_name."].\n"; + } + } +} +elsif ($anvil->data->{switches}{stop}) +{ + my $stop = 0; + + # Check both nodes if a server is running on either node. + my $local_vm_count = 0; + my $remote_vm_count = 0; + + # Call virsh list --all + my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_output => $local_output, + local_return_code => $local_return_code, + }}); + if (not $local_return_code) + { + # Parse output + foreach my $line (split/\n/, $local_output) + { + $line = $anvil->Words->clean_spaces({ string => $line }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + + if ($line =~ /(\d+)\s+(.*?)\s+running/) + { + $local_vm_count++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_vm_count => $local_vm_count }}); + } + } + } + + my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{virsh}." list --all", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + remote_output => $remote_output, + remote_error => $remote_error, + remote_return_code => $remote_return_code, + }}); + if (not $remote_return_code) + { + # Parse output + foreach my $line (split/\n/, $remote_output) + { + $line = $anvil->Words->clean_spaces({ string => $line }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); + + if ($line =~ /(\d+)\s+(.*?)\s+running/) + { + $remote_vm_count++; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { remote_vm_count => $remote_vm_count }}); + } + } + } + + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_vm_count => $local_vm_count, + remote_vm_count => $remote_vm_count, + }}); + if ((not $local_vm_count) && (not $remote_vm_count)) + { + print "No servers running on either node, stopping daemons.\n"; + foreach my $daemon ("libvirtd.service", "drbd.service") + { + my $running_local = 0; + my $running_peer = 0; + + my ($local_output, $local_return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + local_output => $local_output, + local_return_code => $local_return_code, + }}); + if ($local_return_code eq "3") + { + # Already stopped. + print "The daemon: [".$daemon."] is already stopped locally.\n"; + } + elsif ($local_return_code eq "0") + { + # Running, stop it. + print "Stopping: [".$daemon."] locally\n"; + my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + } + + my ($remote_output, $remote_error, $remote_return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + remote_output => $remote_output, + remote_error => $remote_error, + remote_return_code => $remote_return_code, + }}); + if ($remote_return_code eq "3") + { + # Already stopped. + print "The daemon: [".$daemon."] is already stopped on: [".$peer_name."].\n"; + } + elsif ($remote_return_code eq "0") + { + # Running, stop it. + print "Stopping: [".$daemon."] on: [".$peer_name."]\n"; + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_name, + shell_call => $anvil->data->{path}{exe}{systemctl}." stop ".$daemon, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + error => $error, + return_code => $return_code, + }}); + } + } + } +}