You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
482 lines
19 KiB
482 lines
19 KiB
#!/usr/bin/perl |
|
# |
|
# This program boots a server. It can be called as either a job from the webui or directly from another |
|
# program or a terminal. |
|
# |
|
# Exit codes; |
|
# 0 = Normal exit. |
|
# 1 = No database connection. |
|
# |
|
# TODO: |
|
# - Add support for boot ordering. |
|
# - Check which node we want to put on and set a location constraint to prefer that node before calling pcs. |
|
# |
|
|
|
use strict; |
|
use warnings; |
|
use Anvil::Tools; |
|
|
|
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; |
|
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; |
|
if (($running_directory =~ /^\./) && ($ENV{PWD})) |
|
{ |
|
$running_directory =~ s/^\./$ENV{PWD}/; |
|
} |
|
|
|
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. |
|
$| = 1; |
|
|
|
my $anvil = Anvil::Tools->new(); |
|
|
|
$anvil->Get->switches({list => ["job-uuid", "no-wait", "server", "server-uuid", "wait"], man => $THIS_FILE}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); |
|
|
|
$anvil->Database->connect(); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); |
|
if (not $anvil->data->{sys}{database}{connections}) |
|
{ |
|
# No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try |
|
# again after we exit. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"}); |
|
sleep 10; |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
if ($anvil->data->{switches}{'job-uuid'}) |
|
{ |
|
# Load the job data. |
|
$anvil->Job->clear(); |
|
$anvil->Job->get_job_details(); |
|
$anvil->Job->update_progress({ |
|
progress => 1, |
|
job_picked_up_by => $$, |
|
job_picked_up_at => time, |
|
message => "job_0282", |
|
}); |
|
|
|
# Pull out the job data. |
|
foreach my $line (split/\n/, $anvil->data->{jobs}{job_data}) |
|
{ |
|
if ($line =~ /server=(.*?)$/) |
|
{ |
|
$anvil->data->{switches}{'server'} = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'switches::server' => $anvil->data->{switches}{'server'}, |
|
}}); |
|
} |
|
if ($line =~ /server-uuid=(.*?)$/) |
|
{ |
|
$anvil->data->{switches}{'server-uuid'} = $1; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, |
|
}}); |
|
} |
|
} |
|
} |
|
|
|
# Now check that we have a server. If it's a server_uuid, read the server name. |
|
if ($anvil->data->{switches}{'server-uuid'}) |
|
{ |
|
# Convert the server_uuid to a server_name. |
|
my $query = "SELECT server_name FROM servers WHERE server_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'server-uuid'}).";"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); |
|
|
|
my $server_name = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; |
|
$server_name = "" if not defined $server_name; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }}); |
|
|
|
if ($server_name) |
|
{ |
|
$anvil->data->{switches}{'server'} = $server_name; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'switches::server' => $anvil->data->{switches}{'server'}, |
|
}}); |
|
} |
|
else |
|
{ |
|
# Invalid server UUID. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0269", variables => { |
|
server_uuid => $anvil->data->{switches}{'server-uuid'}, |
|
}}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0269,!!server_uuid!".$anvil->data->{switches}{'server-uuid'}."!!"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
} |
|
|
|
# Do we have a server name? |
|
if (not $anvil->data->{switches}{'server'}) |
|
{ |
|
# Unable to proceed. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0257"}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0257"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Are we a node or DR host? |
|
$anvil->data->{sys}{host_type} = $anvil->Get->host_type(); |
|
if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type} ne "dr")) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0258"}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0258"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
### TODO: Add DR support. For now, this only works on Nodes in a cluster |
|
if ($anvil->data->{sys}{host_type} eq "dr") |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0265"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Make sure that we're in an Anvil! system. |
|
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); |
|
if (not $anvil->data->{sys}{anvil_uuid}) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0260"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
# Wait for pacemaker to be up. |
|
wait_for_pacemaker($anvil); |
|
|
|
# If 'server' is 'all', boot all servers. |
|
if (lc($anvil->data->{switches}{'server'}) eq "all") |
|
{ |
|
boot_all_servers($anvil); |
|
} |
|
else |
|
{ |
|
my $wait = $anvil->data->{switches}{'no-wait'} ? 0 : 1; |
|
boot_server($anvil, $anvil->data->{switches}{'server'}, $wait, 50); |
|
} |
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"}); |
|
$anvil->Job->update_progress({progress => 100, message => "job_0281"}); |
|
|
|
$anvil->nice_exit({exit_code => 0}); |
|
|
|
|
|
############################################################################################################# |
|
# Functions # |
|
############################################################################################################# |
|
|
|
sub wait_for_pacemaker |
|
{ |
|
my ($anvil) = @_; |
|
|
|
# Boot the server using pcs, but of course, wait for the node to be up. |
|
my $waiting = 1; |
|
while($waiting) |
|
{ |
|
my $problem = $anvil->Cluster->parse_cib({debug => 3}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
if (not $problem) |
|
{ |
|
my $node_name = $anvil->data->{cib}{parsed}{'local'}{name}; |
|
my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ready => $ready }}); |
|
if ($ready) |
|
{ |
|
# We're good. |
|
$waiting = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0279"}); |
|
$anvil->Job->update_progress({progress => 15, message => "job_0279"}); |
|
} |
|
else |
|
{ |
|
# Node isn't ready yet. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0278"}); |
|
$anvil->Job->update_progress({progress => 10, message => "job_0278"}); |
|
} |
|
} |
|
else |
|
{ |
|
# Cluster hasn't started. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"}); |
|
$anvil->Job->update_progress({progress => 5, message => "job_0277"}); |
|
} |
|
if ($waiting) |
|
{ |
|
sleep 10; |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub boot_server |
|
{ |
|
my ($anvil, $server, $wait, $progress) = @_; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
server => $server, |
|
'wait' => $wait, |
|
progress => $progress, |
|
}}); |
|
|
|
# Verify that the server's XML file exists. |
|
my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml"; |
|
if (not -e $definition_file) |
|
{ |
|
# No XML, no boot |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0259", variables => { definition_file => $definition_file }}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0259,!!definition_file!".$definition_file."!!"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
if (not exists $anvil->data->{cib}{parsed}{data}{server}{$server}) |
|
{ |
|
# XML exists, but it's not in the cluster. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0261", variables => { |
|
server => $server, |
|
definition_file => $definition_file, |
|
}}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0261,!!definition_file!".$definition_file."!!,!!server!".$server."!!"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
|
|
my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { status => $status }}); |
|
if ($status ne "off") |
|
{ |
|
# It's not off, can't boot it. |
|
if ($status eq "running") |
|
{ |
|
# Some other state. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0548", variables => { server => $server }}); |
|
$anvil->Job->update_progress({progress => $progress, message => "log_0548,!!server!".$server."!!"}); |
|
return(0); |
|
} |
|
else |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0262", variables => { |
|
server => $server, |
|
status => $status, |
|
}}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0262,!!status!".$status."!!,!!server!".$server."!!"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
} |
|
|
|
# Now boot. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0288", variables => { server => $server }}); |
|
$anvil->Job->update_progress({progress => $progress, message => "job_0288,!!server!".$server."!!"}); |
|
my $problem = $anvil->Cluster->boot_server({ |
|
debug => 2, |
|
server => $server, |
|
'wait' => $wait, |
|
}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
if ($problem) |
|
{ |
|
# Failed, abort. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0267", variables => { server => $server }}); |
|
$anvil->Job->update_progress({progress => 100, message => "error_0267,!!server!".$server."!!"}); |
|
$anvil->nice_exit({exit_code => 1}); |
|
} |
|
else |
|
{ |
|
if ($wait) |
|
{ |
|
# Booted! |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0280", variables => { server => $server }}); |
|
$anvil->Job->update_progress({progress => $progress, message => "job_0280,!!server!".$server."!!"}); |
|
} |
|
else |
|
{ |
|
# Boot requested |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0287", variables => { server => $server }}); |
|
$anvil->Job->update_progress({progress => $progress, message => "job_0287,!!server!".$server."!!"}); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
sub boot_all_servers |
|
{ |
|
my ($anvil) = @_; |
|
|
|
### TODO: Manage the boot order here. |
|
# We top out at 90, bottom is 20. |
|
my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_count => $server_count }}); |
|
if (not $server_count) |
|
{ |
|
# No servers exist yet. |
|
return(0); |
|
} |
|
|
|
# Load information about the servers on this Anvil!. |
|
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); |
|
|
|
my $increment = int(70 / $server_count); |
|
my $percent = 15; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { increment => $increment }}); |
|
|
|
# Loop until all are processed. |
|
my $waiting = 1; |
|
my $start_time = time; |
|
while($waiting) |
|
{ |
|
# Get a list of servers now. |
|
$anvil->Database->get_servers({debug => 3}); |
|
|
|
# This will get set to 0 if any servers are waiting to boot. |
|
my $all_processed = 1; |
|
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}}) |
|
{ |
|
my $status = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{status}; |
|
my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{host_name}; |
|
my $role = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{role}; |
|
my $active = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{active}; |
|
my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid}; |
|
my $boot_delay = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_delay}; |
|
$boot_delay = 0 if not $boot_delay; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
's1:server_name' => $server_name, |
|
's2:status' => $status, |
|
's2:host_name' => $host_name, |
|
's4:role' => $role, |
|
's5:active' => $active, |
|
's6:server_uuid' => $server_uuid, |
|
's7:boot_delay' => $boot_delay, |
|
}}); |
|
|
|
if (not exists $anvil->data->{boot_server}{$server_name}{processed}) |
|
{ |
|
# This will get set to the boot time once we actually start it. This will let |
|
# us time when servers that boot after this server can boot. |
|
$anvil->data->{boot_server}{$server_name}{processed} = 0; |
|
} |
|
elsif ($anvil->data->{boot_server}{$server_name}{processed}) |
|
{ |
|
# Already processed. |
|
next; |
|
} |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0719", variables => { server => $server_name }}); |
|
|
|
my $boot_after_server_uuid = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_after_server_uuid}; |
|
$boot_after_server_uuid = "" if not defined $boot_after_server_uuid; |
|
$boot_after_server_uuid = "" if $boot_after_server_uuid eq "NULL"; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { boot_after_server_uuid => $boot_after_server_uuid }}); |
|
if ($boot_after_server_uuid) |
|
{ |
|
if ($boot_after_server_uuid eq "00000000-0000-0000-0000-000000000000") |
|
{ |
|
# This server is configured to stay off. |
|
$anvil->data->{boot_server}{$server_name}{processed} = time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"boot_server::${server_name}::processed" => $anvil->data->{boot_server}{$server_name}{processed}, |
|
}}); |
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0723", variables => { server => $server_name }}); |
|
next; |
|
} |
|
|
|
# What's the server's name. |
|
my $boot_after_server_name = $anvil->data->{servers}{server_uuid}{$boot_after_server_uuid}{server_name}; |
|
$boot_after_server_name = "" if not defined $boot_after_server_name; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { boot_after_server_name => $boot_after_server_name }}); |
|
|
|
# Has this server processed? |
|
if ($boot_after_server_name) |
|
{ |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0717", variables => { |
|
delay => $boot_delay, |
|
server => $boot_after_server_name, |
|
}}); |
|
if (not exists $anvil->data->{boot_server}{$boot_after_server_name}) |
|
{ |
|
$anvil->data->{boot_server}{$boot_after_server_name}{processed} = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"boot_server::${boot_after_server_name}::processed" => $anvil->data->{boot_server}{$boot_after_server_name}{processed}, |
|
}}); |
|
} |
|
|
|
if ($anvil->data->{boot_server}{$boot_after_server_name}{processed}) |
|
{ |
|
my $processed_seconds_ago = time - $anvil->data->{boot_server}{$boot_after_server_name}{processed}; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { processed_seconds_ago => $processed_seconds_ago }}); |
|
if ($processed_seconds_ago > $boot_delay) |
|
{ |
|
# Ready to boot. |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0721", variables => { server => $server_name }}); |
|
} |
|
else |
|
{ |
|
# Not ready yet. |
|
$all_processed = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); |
|
|
|
my $time_to_wait = $boot_delay - $processed_seconds_ago; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0720", variables => { |
|
boot_after_server => $boot_after_server_name, |
|
this_server => $server_name, |
|
time_to_wait => $time_to_wait, |
|
}}); |
|
next; |
|
} |
|
} |
|
else |
|
{ |
|
# The other server hasn't processed yet. |
|
$all_processed = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); |
|
|
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0718", variables => { |
|
boot_after_server => $boot_after_server_name, |
|
this_server => $server_name, |
|
}}); |
|
next; |
|
} |
|
} |
|
} |
|
|
|
if ($status eq "off") |
|
{ |
|
# Boot it. |
|
my $wait = $anvil->data->{switches}{'wait'} ? 1 : 0; |
|
$percent += $increment; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
'wait' => $wait, |
|
percent => $percent, |
|
}}); |
|
boot_server($anvil, $server_name, $wait, $percent); |
|
|
|
# If we're here, the server processed. |
|
$anvil->data->{boot_server}{$server_name}{processed} = time; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { |
|
"boot_server::${server_name}::processed" => $anvil->data->{boot_server}{$server_name}{processed}, |
|
}}); |
|
} |
|
elsif (not $anvil->data->{boot_server}{$server_name}{processed}) |
|
{ |
|
# It may have booted before we ran. |
|
$anvil->data->{boot_server}{$server_name}{processed} = time; |
|
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0722", variables => { server => $server_name }}); |
|
} |
|
} |
|
|
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); |
|
if ($all_processed) |
|
{ |
|
# We're done! |
|
$waiting = 0; |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); |
|
} |
|
else |
|
{ |
|
# Wait a bit. |
|
sleep 2; |
|
my $problem = $anvil->Cluster->parse_cib({debug => 3}); |
|
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); |
|
} |
|
} |
|
|
|
return(0); |
|
}
|
|
|