* Created the skeleton Cluster.pm.

* Got anvil-join-anvil to the point where is initializes and starts the cluster.
* Deleted the old ssh key handling logic in anvil-daemon.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 5 years ago
parent 16157f7697
commit 597d9413a5
  1. 17
      Anvil/Tools.pm
  2. 102
      Anvil/Tools/Cluster.pm
  3. 2
      Anvil/Tools/Validate.pm
  4. 6
      notes
  5. 6
      share/words.xml
  6. 118
      tools/anvil-daemon
  7. 173
      tools/anvil-join-anvil
  8. 3
      tools/test.pl

@ -41,6 +41,7 @@ binmode(STDOUT, ':encoding(utf-8)');
# methods via their containing module's name. (A La: C<< $anvil->Module->method >> rather than C<< $anvil->method >>).
use Anvil::Tools::Account;
use Anvil::Tools::Alert;
use Anvil::Tools::Cluster;
use Anvil::Tools::Convert;
use Anvil::Tools::Database;
use Anvil::Tools::DRBD;
@ -123,6 +124,7 @@ sub new
HANDLE => {
ACCOUNT => Anvil::Tools::Account->new(),
ALERT => Anvil::Tools::Alert->new(),
CLUSTER => Anvil::Tools::Cluster->new(),
CONVERT => Anvil::Tools::Convert->new(),
DATABASE => Anvil::Tools::Database->new(),
DRBD => Anvil::Tools::DRBD->new(),
@ -165,6 +167,7 @@ sub new
# Get a handle on the various submodules
$anvil->Account->parent($anvil);
$anvil->Alert->parent($anvil);
$anvil->Cluster->parent($anvil);
$anvil->Convert->parent($anvil);
$anvil->Database->parent($anvil);
$anvil->DRBD->parent($anvil);
@ -469,6 +472,18 @@ sub Alert
return ($self->{HANDLE}{ALERT});
}
=head2 Cluster
Access the C<Cluster.pm> methods via 'C<< $anvil->Cluster->method >>'.
=cut
sub Cluster
{
my $self = shift;
return ($self->{HANDLE}{CLUSTER});
}
=head2 Convert
Access the C<Convert.pm> methods via 'C<< $anvil->Convert->method >>'.
@ -1056,6 +1071,7 @@ sub _set_paths
'anvil.conf' => "/etc/anvil/anvil.conf",
'anvil.version' => "/etc/anvil/anvil.version",
'autoindex.conf' => "/etc/httpd/conf.d/autoindex.conf",
'corosync.conf' => "/etc/corosync/corosync.conf",
'dhcpd.conf' => "/etc/dhcp/dhcpd.conf",
'dnf.conf' => "/etc/dnf/dnf.conf",
'firewalld.conf' => "/etc/firewalld/firewalld.conf",
@ -1175,6 +1191,7 @@ sub _set_paths
nmcli => "/bin/nmcli",
openssl => "/usr/bin/openssl",
passwd => "/usr/bin/passwd",
pcs => "/usr/sbin/pcs",
ping => "/usr/bin/ping",
pgrep => "/usr/bin/pgrep",
ps => "/usr/bin/ps",

@ -0,0 +1,102 @@
package Anvil::Tools::Cluster;
#
# This module contains methods related to Pacemaker/pcs and clustering functions in general.
#
use strict;
use warnings;
use Scalar::Util qw(weaken isweak);
use Data::Dumper;
our $VERSION = "3.0.0";
my $THIS_FILE = "Cluster.pm";
### Methods;
# get_peer
=pod
=encoding utf8
=head1 NAME
Anvil::Tools::Cluster
Provides all methods related to clustering specifically (pacemaker, pcs, etc).
=head1 SYNOPSIS
use Anvil::Tools;
# Get a common object handle on all Anvil::Tools modules.
my $anvil = Anvil::Tools->new();
# Access to methods using '$anvil->Cluster->X'.
#
=head1 METHODS
Methods in this module;
=cut
sub new
{
my $class = shift;
my $self = {};
bless $self, $class;
return ($self);
}
# Get a handle on the Anvil::Tools object. I know that technically that is a sibling module, but it makes more
# sense in this case to think of it as a parent.
sub parent
{
my $self = shift;
my $parent = shift;
$self->{HANDLE}{TOOLS} = $parent if $parent;
# Defend against memory leads. See Scalar::Util'.
if (not isweak($self->{HANDLE}{TOOLS}))
{
weaken($self->{HANDLE}{TOOLS});
}
return ($self->{HANDLE}{TOOLS});
}
#############################################################################################################
# Public methods #
#############################################################################################################
=head2 get_peer
This method will return the peer's host name, B<< if >> this host is itself a node in a cluster.
=cut
sub get_peer
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->_test_access()" }});
my $peer_host_name = "";
return($peer_host_name);
}
# =head3
#
# Private Functions;
#
# =cut
#############################################################################################################
# Private functions #
#############################################################################################################

@ -178,7 +178,7 @@ sub domain_name
### TODO: Add a 'strict' parameter to control this) and/or support domain_private_tld
my %options = (domain_allow_underscore => 1, domain_disable_tld_validation => 1);
my $dvd = Data::Validate::Domain->new(%options);
my $test = $dvd->domain($name);
my $test = $dvd->is_domain($name);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { test => $test }});
if (not $test)
{

@ -253,11 +253,9 @@ systemctl enable pcsd.service
systemctl disable libvirtd.service
systemctl stop libvirtd.service
==== One node
pcs cluster auth el8-a01n01 el8-a01n02
# Username: hacluster
# Password:
pcs host auth el8-a01n01 el8-a01n02 -u hacluster -p "secret"
pcs cluster setup --name m3-anvil-01 m3-a01n01 m3-a01n02
pcs cluster setup m3-anvil-01 m3-a01n01 m3-a01n02
pcs cluster start --all
pcs stonith create virsh_node1 fence_virsh pcmk_host_list="m3-a01n01" ipaddr="192.168.122.1" passwd="secret" login="root" delay="15" port="m3-a01n01" op monitor interval="60"
pcs stonith create virsh_node2 fence_virsh pcmk_host_list="m3-a01n02" ipaddr="192.168.122.1" passwd="secret" login="root" port="m3-a01n02" op monitor interval="60"

@ -336,6 +336,12 @@ Failure! The return code: [#!variable!return_code!#] was received ('0' was expec
<key name="job_0094">Enabled and started the daemon: [#!variable!daemon!#].</key>
<key name="job_0095">Disable and stop the daemon: [#!variable!daemon!#].</key>
<key name="job_0096">This is a DR host, skipping pacemaker configuration.</key>
<key name="job_0097"><![CDATA[[ Error ] - Authorizing against 'pcsd' failed. Will try again in a moment in case we're waiting for the peer node to set its 'hacluster' shell user password.]]></key>
<key name="job_0098">Successfully authorized using 'pcsd' on both nodes.</key>
<key name="job_0099">No existing cluster found, will run initial setup.</key>
<key name="job_0100">The corosync.conf file does not exist locally, but it does exist on the peer. Copying the file to here.</key>
<key name="job_0101"><![CDATA[[ Error ] - Something went wrong while trying to initialize the cluster. The error, if any, was: [#!variable!error!#].]]></key>
<key name="job_0102">Starting the cluster now...</key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>

@ -362,6 +362,8 @@ sub handle_periodic_tasks
# Check that the users we care about have ssh public keys and they're recorded in ssh_keys.
$anvil->System->check_ssh_keys({debug => 3});
$anvil->System->update_hosts({debug => 3});
# Check if the files on disk have changed. Even if it is time to check, don't if a job is
# running.
if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums))
@ -486,122 +488,6 @@ sub handle_periodic_tasks
return(0);
}
# Get a list of machine host keys and user public keys from other machines.
sub get_other_keys
{
my ($anvil) = @_;
delete $anvil->data->{peers}{ssh_keys};
# Get the machine keys for other hosts.
my $query = "
SELECT
host_uuid,
host_name,
host_key
FROM
hosts
WHERE
host_uuid != ".$anvil->Database->quote($anvil->Get->host_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $host_uuid = $row->[0];
my $host_name = $row->[1];
my $host_key = $row->[2];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
host_uuid => $host_uuid,
host_name => $host_name,
host_key => $host_key,
}});
$anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$host_name} = $host_key;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"peers::ssh_keys::${host_uuid}::host::${host_name}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$host_name},
}});
# If the host name is the long host name, create another entry with the short name.
if ($host_name =~ /^(.*?)\./)
{
my $short_host_name = $1;
$anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$short_host_name} = $host_key;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"peers::ssh_keys::${host_uuid}::host::${short_host_name}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$short_host_name},
}});
}
# Find any IP addresses for this host.
my $query = "SELECT ip_address_address FROM ip_addresses WHERE ip_address_host_uuid = ".$anvil->Database->quote($host_uuid)." AND ip_address_note != 'DELETED';";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }});
my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
my $count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $ip_address_address = $row->[0];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
ip_address_address => $ip_address_address,
}});
$anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$ip_address_address} = $host_key;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"peers::ssh_keys::${host_uuid}::host::${ip_address_address}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$ip_address_address},
}});
}
}
# Now read in the public key for other users on other machines.
$query = "
SELECT
ssh_key_host_uuid,
ssh_key_user_name,
ssh_key_public_key
FROM
ssh_keys
WHERE
ssh_key_host_uuid != ".$anvil->Database->quote($anvil->Get->host_uuid)."
;";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }});
$results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__});
$count = @{$results};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
results => $results,
count => $count,
}});
foreach my $row (@{$results})
{
my $ssh_key_host_uuid = $row->[0];
my $ssh_key_user_name = $row->[1];
my $ssh_key_public_key = $row->[2];
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
ssh_key_host_uuid => $ssh_key_host_uuid,
ssh_key_user_name => $ssh_key_user_name,
ssh_key_public_key => $ssh_key_public_key,
}});
$anvil->data->{peers}{ssh_keys}{$ssh_key_host_uuid}{user}{$ssh_key_user_name} = $ssh_key_public_key;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
"peers::ssh_keys::${ssh_key_host_uuid}::user::${ssh_key_user_name}" => $anvil->data->{peers}{ssh_keys}{$ssh_key_host_uuid}{user}{$ssh_key_user_name},
}});
}
return(0);
}
# This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config
# variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing
# anything.

@ -11,12 +11,14 @@
# 5 = Problem parsing job data or loading manifest or anvil data using job data.
#
# TODO:
#
# -
#
use strict;
use warnings;
use Anvil::Tools;
use Data::Dumper;
use String::ShellQuote;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
@ -72,17 +74,37 @@ sub configure_pacemaker
my $manifest_uuid = $anvil->data->{sys}{manifest_uuid};
### TODO: Move these to variables in the 'sys' hash
my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name};
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $host_name = $anvil->data->{sys}{host_name};
my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name};
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $host_name = $anvil->data->{sys}{host_name};
my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
my $node1_host_uuid = $anvil->data->{sys}{node1_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name};
$node1_host_name =~ s/\..*$//;
my $node2_host_uuid = $anvil->data->{sys}{node2_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name};
$node2_host_name =~ s/\..*$//;
my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name;
my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
my $escaped_password = shell_quote($new_password);
my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:machine' => $machine,
's1:anvil_name' => $anvil_name,
's3:host_name' => $host_name,
's4:manifest_uuid' => $manifest_uuid,
's5:anvil_uuid' => $anvil_uuid,
's6:new_password' => $anvil->Log->is_secure($new_password),
machine => $machine,
anvil_uuid => $anvil_uuid,
anvil_name => $anvil_name,
host_name => $host_name,
manifest_uuid => $manifest_uuid,
node1_host_uuid => $node1_host_uuid,
node1_host_name => $node1_host_name,
node2_host_uuid => $node2_host_uuid,
node2_host_name => $node2_host_name,
peer_host_name => $peer_host_name,
peer_host_uuid => $peer_host_uuid,
new_password => $anvil->Log->is_secure($new_password),
escaped_password => $anvil->Log->is_secure($escaped_password),
auth_shell_call => $anvil->Log->is_secure($auth_shell_call),
}});
# If this is a DR box, we don't use pacemaker.
@ -126,14 +148,134 @@ sub configure_pacemaker
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0095,!!daemon!libvirtd!!");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0095", variables => { daemon => "libvirtd" }});
# If there is no corosync.conf, see if the peer has it. If so, copy it. If not, we'll initialize the
# cluster shortly.
if (not -e $anvil->data->{path}{configs}{'corosync.conf'})
{
my $cluster_conf = $anvil->Storeage->read_file({
file => $anvil->data->{path}{configs}{'corosync.conf'},
target => $peer_host_name,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_conf => $cluster_conf }});
if ($cluster_conf ne "!!error!!")
{
# Write the file out.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0100"});
$anvil->Storage->write_file({
body => $cluster_conf,
file => $anvil->data->{path}{configs}{'corosync.conf'},
user => "root",
group => "root",
mode => "0644",
});
}
}
### Run on node 1 only.
if ($machine eq "node2")
{
# We loop until the peer finishes.
# We loop until the peer finishes or the peer's job hit's 100.
}
else
{
# Proceed with cluster setup.
my $waiting = 1;
my $warning_printed = 0;
while($waiting)
{
my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong.
if (not $warning_printed)
{
# Update the job
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0097");
$warning_printed = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { warning_printed => $warning_printed }});
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0097"});
sleep 5;
}
else
{
# We're good.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0098");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0098"});
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
}
}
# If there is no corosync.conf, see if the peer has it. If so, copy it. If not, initialize
# the cluster.
if (not -e $anvil->data->{path}{configs}{'corosync.conf'})
{
# There's no cluster yet, initialize it.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0099,!!anvil_name!".$anvil_name."!!");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0099", variables => { anvil_name => $anvil_name }});
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster setup ".$anvil_name." ".$node1_host_name." ".$node2_host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
output => $output,
return_code => $return_code,
}});
if ($return_code)
{
# Something went wrong
update_progress($anvil, 100, "job_0101,!!error!".$output."!!");
sleep 2;
$anvil->nice_exit({exit_code => 5});
}
}
# Now, if we can read the CIB, see where the setup is. If not, start by setting up the
# cluster.
my $cib_data = "";
my $cluster_started = 0;
until ($cib_data)
{
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }});
($cib_data, my $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
cib_data => $cib_data,
return_code => $return_code,
}});
if ($return_code)
{
if (not $cluster_started)
{
# Start the cluster.
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"});
$cluster_started = 1;
my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
cluster_started => $cluster_started,
shell_call => $shell_call,
}});
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => {
output => $output,
return_code => $return_code,
}});
}
}
die;
}
die;
}
=cut
$anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed};
@ -896,7 +1038,7 @@ sub check_local_network
{
# It's fine
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0087,!!interface!".$in_iface."!!,!!mtu!".$mtu."!!");
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0087", variables => {
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "job_0087", variables => {
interface => $in_iface,
mtu => $mtu,
}});
@ -1037,7 +1179,10 @@ sub check_local_network
### TODO: Do we really need passwordless SSH anymore?
# Configure SSH by adding ours and our peer's SSH keys to ~/.ssh/known_hosts
$anvil->System->check_ssh_keys({debug => 2});
$anvil->System->check_ssh_keys({debug => 3});
# Update the hosts file.
$anvil->System->update_hosts({debug => 3});
# Setup IPMI, if needed.
### TODO: Do this when on real hardware

@ -5,6 +5,7 @@ use strict;
use warnings;
use Anvil::Tools;
use Data::Dumper;
use String::ShellQuote;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
@ -24,4 +25,4 @@ print "Connecting to the database(s);\n";
$anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"});
$anvil->System->update_hosts({debug => 3});
$anvil->System->parse_corosync_conf({debug => 2});

Loading…
Cancel
Save