Added loading existing corosync/authkey from peer during rebuild.

Signed-off-by: digimer <mkelly@alteeve.ca>
main
digimer 8 months ago
parent ec6acdd6d8
commit e00dec7cba
  1. 1
      Anvil/Tools.pm
  2. 1
      share/words.xml
  3. 9
      tools/anvil-daemon
  4. 35
      tools/anvil-join-anvil

@ -1061,6 +1061,7 @@ sub _set_paths
'autoindex.conf' => "/etc/httpd/conf.d/autoindex.conf", 'autoindex.conf' => "/etc/httpd/conf.d/autoindex.conf",
'cib.xml' => "/var/lib/pacemaker/cib/cib.xml", 'cib.xml' => "/var/lib/pacemaker/cib/cib.xml",
'corosync.conf' => "/etc/corosync/corosync.conf", 'corosync.conf' => "/etc/corosync/corosync.conf",
'corosync-authkey' => "/etc/corosync/authkey",
'dhcpd.conf' => "/etc/dhcp/dhcpd.conf", 'dhcpd.conf' => "/etc/dhcp/dhcpd.conf",
'dnf.conf' => "/etc/dnf/dnf.conf", 'dnf.conf' => "/etc/dnf/dnf.conf",
'drbd-proxy.license' => "/etc/drbd-proxy.license", 'drbd-proxy.license' => "/etc/drbd-proxy.license",

@ -1765,6 +1765,7 @@ Note: This is a permanent action! If you protect this server again later, a full
<key name="job_0482">Waiting for the peer's hostname to be: [#!variable!peer_name!#] in the database.</key> <key name="job_0482">Waiting for the peer's hostname to be: [#!variable!peer_name!#] in the database.</key>
<key name="job_0483">The peer's hostname is: [#!variable!peer_name!#], proceeding.</key> <key name="job_0483">The peer's hostname is: [#!variable!peer_name!#], proceeding.</key>
<key name="job_0484">The peer's hostname is currently: [#!variable!old_peer_name!#], waiting for it to be changed to: [#!variable!peer_name!#]... Will check again shortly.</key> <key name="job_0484">The peer's hostname is currently: [#!variable!old_peer_name!#], waiting for it to be changed to: [#!variable!peer_name!#]... Will check again shortly.</key>
<key name="job_0485">The corosync authkey file does not exist locally, but it does exist on the peer. Copying the file to here.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>

@ -1617,7 +1617,7 @@ sub run_jobs
my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status}; my $job_status = $anvil->data->{jobs}{modified_date}{$modified_date}{job_uuid}{$job_uuid}{job_status};
my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0; my $started_seconds_ago = $job_picked_up_at ? (time - $job_picked_up_at) : 0;
my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0; my $updated_seconds_ago = $job_updated ? (time - $job_updated) : 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
's01:job_uuid' => $job_uuid, 's01:job_uuid' => $job_uuid,
's02:job_command' => $job_command, 's02:job_command' => $job_command,
's03:short_command' => $short_command, 's03:short_command' => $short_command,
@ -1821,21 +1821,22 @@ sub run_jobs
{ {
# Is the job_uuid associated with this command done? # Is the job_uuid associated with this command done?
my $started_job_uuid = $anvil->data->{jobs_started}{$short_command}; my $started_job_uuid = $anvil->data->{jobs_started}{$short_command};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { started_job_uuid => $started_job_uuid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { started_job_uuid => $started_job_uuid }});
if (exists $anvil->data->{jobs}{running}{$started_job_uuid}) if (exists $anvil->data->{jobs}{running}{$started_job_uuid})
{ {
# If the previously running job and this job have the same # If the previously running job and this job have the same
# UUID, it failed and needs to restart. # UUID, it failed and needs to restart.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => {
job_uuid => $job_uuid, job_uuid => $job_uuid,
started_job_uuid => $started_job_uuid, started_job_uuid => $started_job_uuid,
"jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress}, "jobs::running::${started_job_uuid}::job_progress" => $anvil->data->{jobs}{running}{$started_job_uuid}{job_progress},
}}); }});
die if not $job_uuid;
if ($started_job_uuid eq $job_uuid) if ($started_job_uuid eq $job_uuid)
{ {
# We're restarting. # We're restarting.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0741", variables => { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0742", variables => {
command => $job_command, command => $job_command,
job_uuid => $job_uuid, job_uuid => $job_uuid,
}}); }});

@ -59,7 +59,7 @@ if (not $anvil->data->{sys}{database}{connections})
} }
# If there isn't a job UUID, see if the user is asking to rejoin this subnode to a cluster. # If there isn't a job UUID, see if the user is asking to rejoin this subnode to a cluster.
if (not $anvil->data->{switches}{rejoin}) if ($anvil->data->{switches}{rejoin})
{ {
process_rejoin($anvil); process_rejoin($anvil);
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});
@ -841,6 +841,30 @@ sub configure_pacemaker
return(0); return(0);
} }
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 1, list => { 'path::configs::corosync-authkey' => $anvil->data->{path}{configs}{'corosync-authkey'} }});
if (not -f $anvil->data->{path}{configs}{'corosync-authkey'})
{
# In case we're rebuilding, see if the peer already has the '/etc/corosync/authkey' file.
my $corosync_authkey = $anvil->Storage->read_file({
file => $anvil->data->{path}{configs}{'corosync-authkey'},
target => $peer_host_name,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 1, list => { corosync_authkey => $corosync_authkey }});
if ($corosync_authkey ne "!!error!!")
{
# Write the file out.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0485"});
$anvil->Storage->write_file({
debug => 2,
body => $corosync_authkey,
file => $anvil->data->{path}{configs}{'corosync-authkey'},
user => "root",
group => "root",
mode => "0400",
});
}
}
### Run on both nodes. ### Run on both nodes.
# Enable pcsd and start the pcsd daemon. # Enable pcsd and start the pcsd daemon.
my ($return_code) = $anvil->System->enable_daemon({daemon => "pcsd.service"}); my ($return_code) = $anvil->System->enable_daemon({daemon => "pcsd.service"});
@ -887,6 +911,7 @@ sub configure_pacemaker
# If there is no corosync.conf, see if the peer has it. If so, copy it. If not, we'll initialize the # If there is no corosync.conf, see if the peer has it. If so, copy it. If not, we'll initialize the
# cluster shortly. # cluster shortly.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, secure => 1, list => { 'path::configs::corosync.conf' => $anvil->data->{path}{configs}{'corosync.conf'} }});
if (not -e $anvil->data->{path}{configs}{'corosync.conf'}) if (not -e $anvil->data->{path}{configs}{'corosync.conf'})
{ {
my $corosync_conf = $anvil->Storage->read_file({ my $corosync_conf = $anvil->Storage->read_file({
@ -907,6 +932,13 @@ sub configure_pacemaker
mode => "0644", mode => "0644",
}); });
} }
# Restart corosync
my ($return_code) = $anvil->System->restart_daemon({daemon => "corosync.service"});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { return_code => $return_code }});
$return_code = undef;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0095", variables => { daemon => "corosync.service" }});
update_progress($anvil, ($anvil->data->{job}{progress} += 2), "job_0095,!!daemon!corosync.service!!");
} }
# Node 1 initializes, node 2 waits. # Node 1 initializes, node 2 waits.
@ -990,6 +1022,7 @@ sub configure_pacemaker
} }
if (not $both_online) if (not $both_online)
{ {
### TODO: Left off here... In case of rebuilding, we'll never get past this, so we need to try 'pcs cluster start' once.
my $delay = 5; my $delay = 5;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0816", variables => { delay => $delay }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0816", variables => { delay => $delay }});
sleep $delay; sleep $delay;

Loading…
Cancel
Save