* Got anvil-safe-start to the point where is starts the cluster stack. Need to create the 'anvil-boot-server' and 'anvil-shutdown-server' before it can be completed, so those files have been added.

* Created Cluster->parse_quorum() to check if a node is quorate as 'have-quorum' in the pacemaker CIB doesn't appear to be super accurate during startup.
* Fixed a bug in striker-manage-install-target where if a node didn't have any registered IPs, it would break before generating the repo data.
* Fixed a bug in anvil-join-anvil where if the database had to be reconnected, the job data was lost.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent faf1399440
commit e036515df3
  1. 1
      Anvil/Tools.pm
  2. 111
      Anvil/Tools/Cluster.pm
  3. 12
      Anvil/Tools/Striker.pm
  4. 2
      Anvil/Tools/System.pm
  5. 11
      share/words.xml
  6. 2
      tools/Makefile.am
  7. 47
      tools/anvil-boot-server
  8. 9
      tools/anvil-join-anvil
  9. 178
      tools/anvil-safe-start
  10. 0
      tools/anvil-shutdown-server
  11. 12
      tools/striker-manage-install-target

@ -1115,6 +1115,7 @@ sub _set_paths
'chown' => "/usr/bin/chown", 'chown' => "/usr/bin/chown",
chronyc => "/usr/bin/chronyc", chronyc => "/usr/bin/chronyc",
cibadmin => "/usr/sbin/cibadmin", cibadmin => "/usr/sbin/cibadmin",
'corosync-quorumtool' => "/usr/sbin/corosync-quorumtool",
cp => "/usr/bin/cp", cp => "/usr/bin/cp",
createdb => "/usr/bin/createdb", createdb => "/usr/bin/createdb",
createrepo_c => "/usr/bin/createrepo_c", createrepo_c => "/usr/bin/createrepo_c",

@ -26,6 +26,8 @@ my $THIS_FILE = "Cluster.pm";
# is_primary # is_primary
# migrate_server # migrate_server
# parse_cib # parse_cib
# parse_crm_mon
# parse_quorum
# shutdown_server # shutdown_server
# start_cluster # start_cluster
# which_node # which_node
@ -2261,7 +2263,7 @@ sub parse_cib
# call is to determine what resources are running, and where they are running. # call is to determine what resources are running, and where they are running.
$anvil->Cluster->parse_crm_mon({ $anvil->Cluster->parse_crm_mon({
debug => $debug, debug => $debug,
password => $anvil->Log->is_secure($password), password => $password,
port => $port, port => $port,
remote_user => $remote_user, remote_user => $remote_user,
target => $target, target => $target,
@ -2520,6 +2522,113 @@ sub parse_crm_mon
} }
=head2 parse_quorum
This parses C<< corosync-quorumtool -s -p >> to check the status of quorum, as it is more reliable that the CIB's c<< have-quorum >> flag. This does not parse out per-node information.
b<< Note >>: See c<< man corosync-quorumtool >> for details on what these values store.
If the cluster is down, C<< 1 >> is returned. Otherwise, C<< 1 >> is returned.
Data is stored as:
quorum::expected-votes
quorum::flags
quorum::nodes
quorum::quorate
quorum::ring_id
quorum::total-votes
This method takes no parameters.
=cut
sub parse_quorum
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->shutdown_server()" }});
my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $anvil->data->{path}{exe}{'corosync-quorumtool'}." -p -s"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Cluster is down
return(1);
}
else
{
$anvil->data->{quorum}{'expected-votes'} = "";
$anvil->data->{quorum}{flags} = "";
$anvil->data->{quorum}{nodes} = "";
$anvil->data->{quorum}{quorate} = "";
$anvil->data->{quorum}{ring_id} = "";
$anvil->data->{quorum}{'total-votes'} = "";
}
foreach my $line (split/\n/, $output)
{
$line = $anvil->Words->clean_spaces({string => $line});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }});
if ($line =~ /Expected votes:\s+(\d+)$/)
{
$anvil->data->{quorum}{'expected-votes'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::expected-votes" => $anvil->data->{quorum}{'expected-votes'},
}});
next;
}
if ($line =~ /Flags:\s+(.*)$/)
{
$anvil->data->{quorum}{flags} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::flags" => $anvil->data->{quorum}{flags},
}});
next;
}
if ($line =~ /Nodes:\s+(\d+)$/)
{
$anvil->data->{quorum}{nodes} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::nodes" => $anvil->data->{quorum}{nodes},
}});
next;
}
if ($line =~ /Quorate:\s+(.*)$/)
{
$anvil->data->{quorum}{quorate} = lc($1) eq "yes" ? 1 : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::quorate" => $anvil->data->{quorum}{quorate},
}});
next;
}
if ($line =~ /Ring ID:\s+(.*)$/)
{
$anvil->data->{quorum}{ring_id} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::ring_id" => $anvil->data->{quorum}{ring_id},
}});
next;
}
if ($line =~ /Nodes:\s+(\d+)$/)
{
$anvil->data->{quorum}{'total-votes'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"quorum::total-votes" => $anvil->data->{quorum}{'total-votes'},
}});
next;
}
}
return(0);
}
=head2 shutdown_server =head2 shutdown_server
This shuts down a server that is running on the Anvil! system. If there is a problem, C<< !!error!! >> is returned. On success, C<< 0 >> is returned. This shuts down a server that is running on the Anvil! system. If there is a problem, C<< !!error!! >> is returned. On success, C<< 0 >> is returned.

@ -1025,14 +1025,14 @@ WHERE
"manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ipmi_ip" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ipmi_ip}, "manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ipmi_ip" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ipmi_ip},
}}); }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"ref(parsed_xml->{machines}{$machine}{upses}{ups})" => ref($parsed_xml->{machines}{$machine}{upses}{ups}), "ref(parsed_xml->{machines}{$machine}{upses}{ups})" => ref($parsed_xml->{machines}{$machine}{upses}{ups}),
}}); }});
if (ref($parsed_xml->{machines}{$machine}{upses}{ups}) eq "HASH") if (ref($parsed_xml->{machines}{$machine}{upses}{ups}) eq "HASH")
{ {
my $ups_name = $parsed_xml->{machines}{$machine}{upses}{ups}{name}; my $ups_name = $parsed_xml->{machines}{$machine}{upses}{ups}{name};
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used} = $parsed_xml->{machines}{$machine}{upses}{ups}{used}; $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used} = $parsed_xml->{machines}{$machine}{upses}{ups}{used};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ups::${ups_name}::used" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used}, "manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ups::${ups_name}::used" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used},
}}); }});
} }
@ -1042,20 +1042,20 @@ WHERE
{ {
my $ups_name = $hash_ref->{name}; my $ups_name = $hash_ref->{name};
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used} = $hash_ref->{used}; $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used} = $hash_ref->{used};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ups::${ups_name}::used" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used}, "manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::ups::${ups_name}::used" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{ups}{$ups_name}{used},
}}); }});
} }
} }
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"ref(parsed_xml->{machines}{$machine}{fences}{fence})" => ref($parsed_xml->{machines}{$machine}{fences}{fence}), "ref(parsed_xml->{machines}{$machine}{fences}{fence})" => ref($parsed_xml->{machines}{$machine}{fences}{fence}),
}}); }});
if (ref($parsed_xml->{machines}{$machine}{fences}{fence}) eq "HASH") if (ref($parsed_xml->{machines}{$machine}{fences}{fence}) eq "HASH")
{ {
my $fence_name = $parsed_xml->{machines}{$machine}{fences}{fence}{name}; my $fence_name = $parsed_xml->{machines}{$machine}{fences}{fence}{name};
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port} = $parsed_xml->{machines}{$machine}{fences}{fence}{port}; $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port} = $parsed_xml->{machines}{$machine}{fences}{fence}{port};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::fence::${fence_name}::port" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port}, "manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::fence::${fence_name}::port" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port},
}}); }});
} }
@ -1065,7 +1065,7 @@ WHERE
{ {
my $fence_name = $hash_ref->{name}; my $fence_name = $hash_ref->{name};
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port} = $hash_ref->{port}; $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port} = $hash_ref->{port};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::fence::${fence_name}::port" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port}, "manifests::manifest_uuid::${manifest_uuid}::parsed::machine::${machine}::fence::${fence_name}::port" => $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{machine}{$machine}{fence}{$fence_name}{port},
}}); }});
} }

@ -4768,7 +4768,7 @@ sub update_hosts
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0481", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0481", variables => {
old_ip => $current_ip, old_ip => $current_ip,
new_ip => $ip_address, new_ip => $ip_address,
name => $name, host => $name,
}}); }});
$changes = 1; $changes = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }});

@ -357,6 +357,12 @@ The error was:
</key> </key>
<key name="error_0254">Failed to read the lvm.conf file. The reason why should be logged above.</key> <key name="error_0254">Failed to read the lvm.conf file. The reason why should be logged above.</key>
<key name="error_0255">Failed to write the lvm.conf file. The reason why should be logged above.</key> <key name="error_0255">Failed to write the lvm.conf file. The reason why should be logged above.</key>
<key name="error_0256">
The attempt to start the cluster appears to have failed. The return code '0' was expected, but: [#!variable!return_code!#] was received. The output was:
====
#!variable!output!#
====
</key>
<!-- Files templates --> <!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which likes are translatable --> <!-- NOTE: Translating these files requires an understanding of which likes are translatable -->
@ -1450,6 +1456,11 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0605">Failed to acess over the peer: [#!variable!peer!#] over the network: [#!variable!network!#] via the peer's IP: [#!variable!peer_ip!#].</key> <key name="log_0605">Failed to acess over the peer: [#!variable!peer!#] over the network: [#!variable!network!#] via the peer's IP: [#!variable!peer_ip!#].</key>
<key name="log_0606">At least one network connection to the peer: [#!variable!peer!#] is still down. Waiting a bit and then will check again.</key> <key name="log_0606">At least one network connection to the peer: [#!variable!peer!#] is still down. Waiting a bit and then will check again.</key>
<key name="log_0607">All connections to the peer: [#!variable!peer!#] are up!</key> <key name="log_0607">All connections to the peer: [#!variable!peer!#] are up!</key>
<key name="log_0608">The cluster does not appear to be running, starting it now.</key>
<key name="log_0609">The cluster isn't up yet, waiting a bit before checking again.</key>
<key name="log_0610">We're online as: [#!variable!node_name!#], but we're not quorate yet. Continuing to wait.</key>
<key name="log_0611">We're online as: [#!variable!node_name!#] and quorate!</key>
<key name="log_0612">We're not online yet. Waiting for 'in_ccm/crmd/join': [#!variable!in_ccm!#/#!variable!crmd!#/#!variable!join!#]. ('in_ccm' = consensus cluster member, communication layer. 'crmd' = cluster resource manager daemon is up, 'join' = allowed to host resources).</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>

@ -5,6 +5,7 @@ EXTRA_DIST = \
watch_drbd watch_drbd
dist_sbin_SCRIPTS = \ dist_sbin_SCRIPTS = \
anvil-boot-server \
anvil-change-password \ anvil-change-password \
anvil-check-memory \ anvil-check-memory \
anvil-configure-host \ anvil-configure-host \
@ -24,6 +25,7 @@ dist_sbin_SCRIPTS = \
anvil-provision-server \ anvil-provision-server \
anvil-safe-start \ anvil-safe-start \
anvil-scan-network \ anvil-scan-network \
anvil-shutdown-server \
anvil-sync-shared \ anvil-sync-shared \
anvil-update-issue \ anvil-update-issue \
anvil-update-states \ anvil-update-states \

@ -0,0 +1,47 @@
#!/usr/bin/perl
#
# This program boots a server. It can be called as either a job from the webui or directly from another
# program or a terminal.
#
# Exit codes;
# 0 = Normal exit.
# 1 = No database connection.
#
use strict;
use warnings;
use Anvil::Tools;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
}});
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try
# again after we exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0077"});
sleep 10;
$anvil->nice_exit({exit_code => 1});
}
# Pick up the job details
load_job_data($anvil);

@ -146,7 +146,7 @@ sub configure_pacemaker
### TODO: Move these to variables in the 'sys' hash ### TODO: Move these to variables in the 'sys' hash
my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name}; my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name};
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $host_name = $anvil->data->{sys}{host_name}; my $host_name = $anvil->Get->host_name;
my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $node1_host_uuid = $anvil->data->{sys}{node1_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; my $node1_host_uuid = $anvil->data->{sys}{node1_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name};
@ -157,7 +157,7 @@ sub configure_pacemaker
my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name; my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name;
my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
my $escaped_password = shell_quote($new_password); my $escaped_password = shell_quote($new_password);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
machine => $machine, machine => $machine,
anvil_uuid => $anvil_uuid, anvil_uuid => $anvil_uuid,
anvil_name => $anvil_name, anvil_name => $anvil_name,
@ -333,7 +333,7 @@ sub configure_pacemaker
{ {
# Try to authenticate against the peer. # Try to authenticate against the peer.
my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password; my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, secure => 1, list => { auth_shell_call => $auth_shell_call }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 1, list => { auth_shell_call => $auth_shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call}); my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
@ -1665,6 +1665,9 @@ sub check_local_network
# No databases, sleep and then try again. # No databases, sleep and then try again.
sleep 2; sleep 2;
} }
# reload the job data.
load_job($anvil);
} }
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0084"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, 'print' => 1, key => "job_0084"});

@ -13,6 +13,7 @@
# #
# TODO: # TODO:
# - Make this work on DR hosts. # - Make this work on DR hosts.
# - 'pcs quorum unblock' could be useful in sole-survivor cold starts.
# #
use strict; use strict;
@ -33,7 +34,7 @@ $| = 1;
my $anvil = Anvil::Tools->new(); my $anvil = Anvil::Tools->new();
$anvil->Get->switches; $anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Make sure we're running as 'root' # Make sure we're running as 'root'
# $< == real UID, $> == effective UID # $< == real UID, $> == effective UID
@ -55,7 +56,6 @@ $anvil->data->{switches}{force} = "";
$anvil->data->{switches}{'local'} = ""; $anvil->data->{switches}{'local'} = "";
$anvil->data->{switches}{status} = ""; $anvil->data->{switches}{status} = "";
$anvil->Get->switches; $anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# If I have no databases, sleep until I do # If I have no databases, sleep until I do
@ -75,7 +75,7 @@ if (not $anvil->data->{sys}{database}{connections})
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
# Keep waiting # Keep waiting
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, secure => 0, key => "log_0439"});
} }
} }
} }
@ -95,6 +95,12 @@ prerun_checks($anvil);
# networks. There is no timeout. # networks. There is no timeout.
wait_for_access($anvil); wait_for_access($anvil);
# Start pacemaker now.
start_pacemaker($anvil);
# Boot servers.
boot_servers($anvil);
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});
@ -102,6 +108,170 @@ $anvil->nice_exit({exit_code => 0});
# Functions # # Functions #
############################################################################################################# #############################################################################################################
# This boots the servers.
sub boot_servers
{
my ($anvil) = @_;
### TODO: We need to handle boot ordering, once the WebUI is at that stage. For now, bling-boot all
### servers.
return(0);
}
# Start pacemaker and wait until we're quorate.
sub start_pacemaker
{
my ($anvil) = @_;
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $host_uuid = $anvil->Get->host_uuid();
my $short_host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{short_host_name};
my $peer_host_uuid = $anvil->data->{sys}{peer_host_uuid};
my $peer_short_host_name = $anvil->data->{hosts}{host_uuid}{$peer_host_uuid}{short_host_name};
my $fenced_peer = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
anvil_uuid => $anvil_uuid,
host_uuid => $host_uuid,
short_host_name => $short_host_name,
peer_host_uuid => $peer_host_uuid,
peer_short_host_name => $peer_short_host_name,
}});
# Is pacemaker already running?
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Nope, start it.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0608"});
### TODO: A lot more testing is needed for degraded single-node start later.
#my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# What?! Fail out, we're done.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0256", variables => {
output => $output,
return_code => $return_code,
}});
$anvil->nice_exit({exit_code => 1});
}
### TODO: We may implement the logic to fence our peer (similar to cman's post_join_delay'
### logic) at a later time. For now, we'll wait forever for this to exit. This is why
### we set 'wait_for_peer', even though it's not used yet.
# Now wait up to two minutes for the cluster to start. If it's not up by then, we'll fence
# the peer and, if the fence succeeds, unblock quorum.
my $start_time = time;
my $wait_for_peer = $start_time + 120;
my $waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
start_time => $start_time,
wait_for_peer => $wait_for_peer,
}});
while ($waiting)
{
$waiting = 0;
my ($problem) = $anvil->Cluster->parse_cib({debug => 3});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Can't parse the CIB yet, wait.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
else
{
# Quorum, as reported in the CIB, sets 'have-quorum to '1' as soon as it
# starts, the retracts it. For this reason, we use 'parse_quorum()' to get
# the quorum directly from corosync/votequorum.
my ($problem) = $anvil->Cluster->parse_quorum({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Corosync is down.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
else
{
### NOTE: We don't worry about maintenance mode yet, as it shouldn't
### apply, but we may change that view later.
# See where we are.
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $maintenance_mode = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'maintenance-mode'};
my $in_ccm = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{in_ccm};
my $crmd = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{crmd};
my $join = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{'join'};
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready};
my $quorate = $anvil->data->{quorum}{quorate};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:node_name' => $node_name,
's2:maintenance_mode' => $maintenance_mode,
's3:in_ccm/crmd/join' => $in_ccm."/".$crmd."/".$join,
's4:ready' => $ready,
's5:quorate' => $quorate,
}});
# Are we online?
if ($ready)
{
# We're ready, but do we have quorum?
if ($quorate)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0611", variables => { node_name => $node_name }});
}
else
{
# Nope
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
# Keep waiting, or fence the peer?
if (time > $wait_for_peer)
{
### TODO: See above, not implemented yet.
# Time to fence.
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0610", variables => { node_name => $node_name }});
}
}
else
{
# Not ready yet.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0612", variables => {
node_name => $node_name,
in_ccm => $in_ccm,
crmd => $crmd,
'join' => $join,
}});
}
}
}
if ($waiting)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, priority => "alert", key => "log_0609"});
sleep 5;
}
}
}
return(0);
}
# Check for which networks we have and verify that we can ping our peer on each. This function will not # Check for which networks we have and verify that we can ping our peer on each. This function will not
# return until all networks are up. # return until all networks are up.
sub wait_for_access sub wait_for_access
@ -274,9 +444,11 @@ sub prerun_checks
node2_host_uuid => $node2_host_uuid, node2_host_uuid => $node2_host_uuid,
}}); }});
$anvil->data->{sys}{anvil_uuid} = $anvil_uuid;
$anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; $anvil->data->{sys}{peer_host_uuid} = $host_uuid eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
$anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; $anvil->data->{sys}{peer_password} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"sys::anvil_uuid" => $anvil->data->{sys}{anvil_uuid},
"sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid}, "sys::peer_host_uuid" => $anvil->data->{sys}{peer_host_uuid},
"sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}), "sys::peer_password" => $anvil->Log->is_secure($anvil->data->{sys}{peer_password}),
}}); }});

@ -1245,21 +1245,25 @@ ORDER BY
short_host_name => $short_host_name, short_host_name => $short_host_name,
}}); }});
$anvil->Network->load_ips({ $anvil->Network->load_ips({
debug => 3, debug => 2,
host_uuid => $host_uuid, host_uuid => $host_uuid,
host => $short_host_name, host => $short_host_name,
}); });
my $access = 0; my $access = 0;
my ($match) = $anvil->Network->find_matches({ my ($match) = $anvil->Network->find_matches({
debug => 3, debug => 2,
first => $local_short_host_name, first => $local_short_host_name,
second => $short_host_name, second => $short_host_name,
}); });
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { match => $match }});
if (ref($match) eq "HASH")
{
my $keys = keys %{$match}; my $keys = keys %{$match};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'keys' => $keys }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'keys' => $keys }});
if ($keys) }
if (ref($match) eq "HASH")
{ {
foreach my $interface (sort {$a cmp $b} keys %{$match->{$short_host_name}}) foreach my $interface (sort {$a cmp $b} keys %{$match->{$short_host_name}})
{ {

Loading…
Cancel
Save