diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index d9d2b30c..8a8d50f1 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -41,6 +41,7 @@ binmode(STDOUT, ':encoding(utf-8)'); # methods via their containing module's name. (A La: C<< $anvil->Module->method >> rather than C<< $anvil->method >>). use Anvil::Tools::Account; use Anvil::Tools::Alert; +use Anvil::Tools::Cluster; use Anvil::Tools::Convert; use Anvil::Tools::Database; use Anvil::Tools::DRBD; @@ -123,6 +124,7 @@ sub new HANDLE => { ACCOUNT => Anvil::Tools::Account->new(), ALERT => Anvil::Tools::Alert->new(), + CLUSTER => Anvil::Tools::Cluster->new(), CONVERT => Anvil::Tools::Convert->new(), DATABASE => Anvil::Tools::Database->new(), DRBD => Anvil::Tools::DRBD->new(), @@ -165,6 +167,7 @@ sub new # Get a handle on the various submodules $anvil->Account->parent($anvil); $anvil->Alert->parent($anvil); + $anvil->Cluster->parent($anvil); $anvil->Convert->parent($anvil); $anvil->Database->parent($anvil); $anvil->DRBD->parent($anvil); @@ -469,6 +472,18 @@ sub Alert return ($self->{HANDLE}{ALERT}); } +=head2 Cluster + +Access the C methods via 'C<< $anvil->Cluster->method >>'. + +=cut +sub Cluster +{ + my $self = shift; + + return ($self->{HANDLE}{CLUSTER}); +} + =head2 Convert Access the C methods via 'C<< $anvil->Convert->method >>'. @@ -1056,6 +1071,7 @@ sub _set_paths 'anvil.conf' => "/etc/anvil/anvil.conf", 'anvil.version' => "/etc/anvil/anvil.version", 'autoindex.conf' => "/etc/httpd/conf.d/autoindex.conf", + 'corosync.conf' => "/etc/corosync/corosync.conf", 'dhcpd.conf' => "/etc/dhcp/dhcpd.conf", 'dnf.conf' => "/etc/dnf/dnf.conf", 'firewalld.conf' => "/etc/firewalld/firewalld.conf", @@ -1175,6 +1191,7 @@ sub _set_paths nmcli => "/bin/nmcli", openssl => "/usr/bin/openssl", passwd => "/usr/bin/passwd", + pcs => "/usr/sbin/pcs", ping => "/usr/bin/ping", pgrep => "/usr/bin/pgrep", ps => "/usr/bin/ps", diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm new file mode 100644 index 00000000..f752347a --- /dev/null +++ b/Anvil/Tools/Cluster.pm @@ -0,0 +1,102 @@ +package Anvil::Tools::Cluster; +# +# This module contains methods related to Pacemaker/pcs and clustering functions in general. +# + +use strict; +use warnings; +use Scalar::Util qw(weaken isweak); +use Data::Dumper; + +our $VERSION = "3.0.0"; +my $THIS_FILE = "Cluster.pm"; + +### Methods; +# get_peer + +=pod + +=encoding utf8 + +=head1 NAME + +Anvil::Tools::Cluster + +Provides all methods related to clustering specifically (pacemaker, pcs, etc). + +=head1 SYNOPSIS + + use Anvil::Tools; + + # Get a common object handle on all Anvil::Tools modules. + my $anvil = Anvil::Tools->new(); + + # Access to methods using '$anvil->Cluster->X'. + # + +=head1 METHODS + +Methods in this module; + +=cut +sub new +{ + my $class = shift; + my $self = {}; + + bless $self, $class; + + return ($self); +} + +# Get a handle on the Anvil::Tools object. I know that technically that is a sibling module, but it makes more +# sense in this case to think of it as a parent. +sub parent +{ + my $self = shift; + my $parent = shift; + + $self->{HANDLE}{TOOLS} = $parent if $parent; + + # Defend against memory leads. See Scalar::Util'. + if (not isweak($self->{HANDLE}{TOOLS})) + { + weaken($self->{HANDLE}{TOOLS}); + } + + return ($self->{HANDLE}{TOOLS}); +} + + +############################################################################################################# +# Public methods # +############################################################################################################# + +=head2 get_peer + +This method will return the peer's host name, B<< if >> this host is itself a node in a cluster. + +=cut +sub get_peer +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->_test_access()" }}); + + my $peer_host_name = ""; + + + return($peer_host_name); +} + +# =head3 +# +# Private Functions; +# +# =cut + +############################################################################################################# +# Private functions # +############################################################################################################# diff --git a/Anvil/Tools/Validate.pm b/Anvil/Tools/Validate.pm index 4642bb5d..26bf8356 100644 --- a/Anvil/Tools/Validate.pm +++ b/Anvil/Tools/Validate.pm @@ -178,7 +178,7 @@ sub domain_name ### TODO: Add a 'strict' parameter to control this) and/or support domain_private_tld my %options = (domain_allow_underscore => 1, domain_disable_tld_validation => 1); my $dvd = Data::Validate::Domain->new(%options); - my $test = $dvd->domain($name); + my $test = $dvd->is_domain($name); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { test => $test }}); if (not $test) { diff --git a/notes b/notes index 18112087..94d0d707 100644 --- a/notes +++ b/notes @@ -253,11 +253,9 @@ systemctl enable pcsd.service systemctl disable libvirtd.service systemctl stop libvirtd.service ==== One node -pcs cluster auth el8-a01n01 el8-a01n02 -# Username: hacluster -# Password: +pcs host auth el8-a01n01 el8-a01n02 -u hacluster -p "secret" -pcs cluster setup --name m3-anvil-01 m3-a01n01 m3-a01n02 +pcs cluster setup m3-anvil-01 m3-a01n01 m3-a01n02 pcs cluster start --all pcs stonith create virsh_node1 fence_virsh pcmk_host_list="m3-a01n01" ipaddr="192.168.122.1" passwd="secret" login="root" delay="15" port="m3-a01n01" op monitor interval="60" pcs stonith create virsh_node2 fence_virsh pcmk_host_list="m3-a01n02" ipaddr="192.168.122.1" passwd="secret" login="root" port="m3-a01n02" op monitor interval="60" diff --git a/share/words.xml b/share/words.xml index 428d987f..f6cb674f 100644 --- a/share/words.xml +++ b/share/words.xml @@ -336,6 +336,12 @@ Failure! The return code: [#!variable!return_code!#] was received ('0' was expec Enabled and started the daemon: [#!variable!daemon!#]. Disable and stop the daemon: [#!variable!daemon!#]. This is a DR host, skipping pacemaker configuration. + + Successfully authorized using 'pcsd' on both nodes. + No existing cluster found, will run initial setup. + The corosync.conf file does not exist locally, but it does exist on the peer. Copying the file to here. + + Starting the cluster now... Starting: [#!variable!program!#]. diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 4f3d0350..3547b5c2 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -362,6 +362,8 @@ sub handle_periodic_tasks # Check that the users we care about have ssh public keys and they're recorded in ssh_keys. $anvil->System->check_ssh_keys({debug => 3}); + $anvil->System->update_hosts({debug => 3}); + # Check if the files on disk have changed. Even if it is time to check, don't if a job is # running. if ((not $anvil->data->{timing}{jobs_running}) && ($anvil->Storage->check_md5sums)) @@ -486,122 +488,6 @@ sub handle_periodic_tasks return(0); } -# Get a list of machine host keys and user public keys from other machines. -sub get_other_keys -{ - my ($anvil) = @_; - - delete $anvil->data->{peers}{ssh_keys}; - - # Get the machine keys for other hosts. - my $query = " -SELECT - host_uuid, - host_name, - host_key -FROM - hosts -WHERE - host_uuid != ".$anvil->Database->quote($anvil->Get->host_uuid)." -;"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }}); - - my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - results => $results, - count => $count, - }}); - foreach my $row (@{$results}) - { - my $host_uuid = $row->[0]; - my $host_name = $row->[1]; - my $host_key = $row->[2]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - host_uuid => $host_uuid, - host_name => $host_name, - host_key => $host_key, - }}); - - $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$host_name} = $host_key; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "peers::ssh_keys::${host_uuid}::host::${host_name}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$host_name}, - }}); - - # If the host name is the long host name, create another entry with the short name. - if ($host_name =~ /^(.*?)\./) - { - my $short_host_name = $1; - $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$short_host_name} = $host_key; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "peers::ssh_keys::${host_uuid}::host::${short_host_name}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$short_host_name}, - }}); - } - - # Find any IP addresses for this host. - my $query = "SELECT ip_address_address FROM ip_addresses WHERE ip_address_host_uuid = ".$anvil->Database->quote($host_uuid)." AND ip_address_note != 'DELETED';"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }}); - - my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - results => $results, - count => $count, - }}); - foreach my $row (@{$results}) - { - my $ip_address_address = $row->[0]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - ip_address_address => $ip_address_address, - }}); - - $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$ip_address_address} = $host_key; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "peers::ssh_keys::${host_uuid}::host::${ip_address_address}" => $anvil->data->{peers}{ssh_keys}{$host_uuid}{host}{$ip_address_address}, - }}); - } - - } - - # Now read in the public key for other users on other machines. - $query = " -SELECT - ssh_key_host_uuid, - ssh_key_user_name, - ssh_key_public_key -FROM - ssh_keys -WHERE - ssh_key_host_uuid != ".$anvil->Database->quote($anvil->Get->host_uuid)." -;"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { query => $query }}); - - $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); - $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - results => $results, - count => $count, - }}); - foreach my $row (@{$results}) - { - my $ssh_key_host_uuid = $row->[0]; - my $ssh_key_user_name = $row->[1]; - my $ssh_key_public_key = $row->[2]; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - ssh_key_host_uuid => $ssh_key_host_uuid, - ssh_key_user_name => $ssh_key_user_name, - ssh_key_public_key => $ssh_key_public_key, - }}); - - $anvil->data->{peers}{ssh_keys}{$ssh_key_host_uuid}{user}{$ssh_key_user_name} = $ssh_key_public_key; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { - "peers::ssh_keys::${ssh_key_host_uuid}::user::${ssh_key_user_name}" => $anvil->data->{peers}{ssh_keys}{$ssh_key_host_uuid}{user}{$ssh_key_user_name}, - }}); - } - - return(0); -} - # This calls striker-manage-install-target to see if the dhcpd is running or not. If it is or isn't, the config # variable 'install-target::enabled' is set/updated. On non-Striker hosts, this simply returns without doing # anything. diff --git a/tools/anvil-join-anvil b/tools/anvil-join-anvil index 6cc9bde5..c88dffb8 100755 --- a/tools/anvil-join-anvil +++ b/tools/anvil-join-anvil @@ -11,12 +11,14 @@ # 5 = Problem parsing job data or loading manifest or anvil data using job data. # # TODO: -# +# - +# use strict; use warnings; use Anvil::Tools; use Data::Dumper; +use String::ShellQuote; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; @@ -72,17 +74,37 @@ sub configure_pacemaker my $manifest_uuid = $anvil->data->{sys}{manifest_uuid}; ### TODO: Move these to variables in the 'sys' hash - my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name}; - my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; - my $host_name = $anvil->data->{sys}{host_name}; - my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $anvil_name = $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}{name}; + my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; + my $host_name = $anvil->data->{sys}{host_name}; + my $new_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{sys}{node1_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; + $node1_host_name =~ s/\..*$//; + my $node2_host_uuid = $anvil->data->{sys}{node2_host_uuid} = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name}; + $node2_host_name =~ s/\..*$//; + my $peer_host_name = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_name : $node1_host_name; + my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid; + my $escaped_password = shell_quote($new_password); + my $auth_shell_call = $anvil->data->{path}{exe}{pcs}." host auth ".$node1_host_name." ".$node2_host_name." -u hacluster -p ".$escaped_password; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 's1:machine' => $machine, - 's1:anvil_name' => $anvil_name, - 's3:host_name' => $host_name, - 's4:manifest_uuid' => $manifest_uuid, - 's5:anvil_uuid' => $anvil_uuid, - 's6:new_password' => $anvil->Log->is_secure($new_password), + machine => $machine, + anvil_uuid => $anvil_uuid, + anvil_name => $anvil_name, + host_name => $host_name, + manifest_uuid => $manifest_uuid, + node1_host_uuid => $node1_host_uuid, + node1_host_name => $node1_host_name, + node2_host_uuid => $node2_host_uuid, + node2_host_name => $node2_host_name, + peer_host_name => $peer_host_name, + peer_host_uuid => $peer_host_uuid, + new_password => $anvil->Log->is_secure($new_password), + escaped_password => $anvil->Log->is_secure($escaped_password), + auth_shell_call => $anvil->Log->is_secure($auth_shell_call), }}); # If this is a DR box, we don't use pacemaker. @@ -126,14 +148,134 @@ sub configure_pacemaker update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0095,!!daemon!libvirtd!!"); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0095", variables => { daemon => "libvirtd" }}); + # If there is no corosync.conf, see if the peer has it. If so, copy it. If not, we'll initialize the + # cluster shortly. + if (not -e $anvil->data->{path}{configs}{'corosync.conf'}) + { + my $cluster_conf = $anvil->Storeage->read_file({ + file => $anvil->data->{path}{configs}{'corosync.conf'}, + target => $peer_host_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { cluster_conf => $cluster_conf }}); + if ($cluster_conf ne "!!error!!") + { + # Write the file out. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0100"}); + $anvil->Storage->write_file({ + body => $cluster_conf, + file => $anvil->data->{path}{configs}{'corosync.conf'}, + user => "root", + group => "root", + mode => "0644", + }); + } + } + ### Run on node 1 only. if ($machine eq "node2") { - # We loop until the peer finishes. + # We loop until the peer finishes or the peer's job hit's 100. } else { # Proceed with cluster setup. + + my $waiting = 1; + my $warning_printed = 0; + while($waiting) + { + my ($output, $return_code) = $anvil->System->call({debug => 3, secure => 1, shell_call => $auth_shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + # Something went wrong. + if (not $warning_printed) + { + # Update the job + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0097"); + $warning_printed = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { warning_printed => $warning_printed }}); + } + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0097"}); + sleep 5; + } + else + { + # We're good. + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0098"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0098"}); + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + + # If there is no corosync.conf, see if the peer has it. If so, copy it. If not, initialize + # the cluster. + if (not -e $anvil->data->{path}{configs}{'corosync.conf'}) + { + # There's no cluster yet, initialize it. + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0099,!!anvil_name!".$anvil_name."!!"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0099", variables => { anvil_name => $anvil_name }}); + + my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster setup ".$anvil_name." ".$node1_host_name." ".$node2_host_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + output => $output, + return_code => $return_code, + }}); + if ($return_code) + { + # Something went wrong + update_progress($anvil, 100, "job_0101,!!error!".$output."!!"); + sleep 2; + $anvil->nice_exit({exit_code => 5}); + } + } + + # Now, if we can read the CIB, see where the setup is. If not, start by setting up the + # cluster. + my $cib_data = ""; + my $cluster_started = 0; + until ($cib_data) + { + my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster cib"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { shell_call => $shell_call }}); + + ($cib_data, my $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + cib_data => $cib_data, + return_code => $return_code, + }}); + if ($return_code) + { + if (not $cluster_started) + { + # Start the cluster. + update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0102"); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0102"}); + + $cluster_started = 1; + my $shell_call = $anvil->data->{path}{exe}{pcs}." cluster start --all"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + cluster_started => $cluster_started, + shell_call => $shell_call, + }}); + + my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, list => { + output => $output, + return_code => $return_code, + }}); + } + } + die; + } + die; } =cut $anvil->data->{manifests}{manifest_uuid}{$manifest_uuid}{parsed}; @@ -896,7 +1038,7 @@ sub check_local_network { # It's fine update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0087,!!interface!".$in_iface."!!,!!mtu!".$mtu."!!"); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0087", variables => { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, key => "job_0087", variables => { interface => $in_iface, mtu => $mtu, }}); @@ -1037,7 +1179,10 @@ sub check_local_network ### TODO: Do we really need passwordless SSH anymore? # Configure SSH by adding ours and our peer's SSH keys to ~/.ssh/known_hosts - $anvil->System->check_ssh_keys({debug => 2}); + $anvil->System->check_ssh_keys({debug => 3}); + + # Update the hosts file. + $anvil->System->update_hosts({debug => 3}); # Setup IPMI, if needed. ### TODO: Do this when on real hardware diff --git a/tools/test.pl b/tools/test.pl index c3fa6f03..d7cfdb37 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -5,6 +5,7 @@ use strict; use warnings; use Anvil::Tools; use Data::Dumper; +use String::ShellQuote; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; @@ -24,4 +25,4 @@ print "Connecting to the database(s);\n"; $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0132"}); -$anvil->System->update_hosts({debug => 3}); +$anvil->System->parse_corosync_conf({debug => 2});