#!/usr/bin/perl # # This program boots a server. It can be called as either a job from the webui or directly from another # program or a terminal. # # Exit codes; # 0 = Normal exit. # 1 = No database connection. # # TODO: # - Add support for boot ordering. # - Check which node we want to put on and set a location constraint to prefer that node before calling pcs. # use strict; use warnings; use Anvil::Tools; my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; if (($running_directory =~ /^\./) && ($ENV{PWD})) { $running_directory =~ s/^\./$ENV{PWD}/; } # Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. $| = 1; my $anvil = Anvil::Tools->new(); $anvil->Get->switches({list => ["job-uuid", "no-wait", "server", "server-uuid", "wait"], man => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => $anvil->data->{switches}}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Database->connect(); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try # again after we exit. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0075"}); sleep 10; $anvil->nice_exit({exit_code => 1}); } if ($anvil->data->{switches}{'job-uuid'}) { # Load the job data. $anvil->Job->clear(); $anvil->Job->get_job_details(); $anvil->Job->update_progress({ progress => 1, job_picked_up_by => $$, job_picked_up_at => time, message => "job_0282", }); # Pull out the job data. foreach my $line (split/\n/, $anvil->data->{jobs}{job_data}) { if ($line =~ /server=(.*?)$/) { $anvil->data->{switches}{'server'} = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::server' => $anvil->data->{switches}{'server'}, }}); } if ($line =~ /server-uuid=(.*?)$/) { $anvil->data->{switches}{'server-uuid'} = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, }}); } } } # Now check that we have a server. If it's a server_uuid, read the server name. if ($anvil->data->{switches}{'server-uuid'}) { # Convert the server_uuid to a server_name. my $query = "SELECT server_name FROM servers WHERE server_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'server-uuid'}).";"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $server_name = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; $server_name = "" if not defined $server_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }}); if ($server_name) { $anvil->data->{switches}{'server'} = $server_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::server' => $anvil->data->{switches}{'server'}, }}); } else { # Invalid server UUID. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0269", variables => { server_uuid => $anvil->data->{switches}{'server-uuid'}, }}); $anvil->Job->update_progress({progress => 100, message => "error_0269,!!server_uuid!".$anvil->data->{switches}{'server-uuid'}."!!"}); $anvil->nice_exit({exit_code => 1}); } } # Do we have a server name? if (not $anvil->data->{switches}{'server'}) { # Unable to proceed. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0257"}); $anvil->Job->update_progress({progress => 100, message => "error_0257"}); $anvil->nice_exit({exit_code => 1}); } # Are we a node or DR host? $anvil->data->{sys}{host_type} = $anvil->Get->host_type(); if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type} ne "dr")) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0258"}); $anvil->Job->update_progress({progress => 100, message => "error_0258"}); $anvil->nice_exit({exit_code => 1}); } ### TODO: Add DR support. For now, this only works on Nodes in a cluster if ($anvil->data->{sys}{host_type} eq "dr") { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"}); $anvil->Job->update_progress({progress => 100, message => "error_0265"}); $anvil->nice_exit({exit_code => 1}); } # Make sure that we're in an Anvil! system. $anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); if (not $anvil->data->{sys}{anvil_uuid}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"}); $anvil->Job->update_progress({progress => 100, message => "error_0260"}); $anvil->nice_exit({exit_code => 1}); } # Wait for pacemaker to be up. wait_for_pacemaker($anvil); # If 'server' is 'all', boot all servers. if (lc($anvil->data->{switches}{'server'}) eq "all") { boot_all_servers($anvil); } else { my $wait = $anvil->data->{switches}{'no-wait'} ? 0 : 1; boot_server($anvil, $anvil->data->{switches}{'server'}, $wait, 50); } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"}); $anvil->Job->update_progress({progress => 100, message => "job_0281"}); $anvil->nice_exit({exit_code => 0}); ############################################################################################################# # Functions # ############################################################################################################# sub wait_for_pacemaker { my ($anvil) = @_; # Boot the server using pcs, but of course, wait for the node to be up. my $waiting = 1; while($waiting) { my $problem = $anvil->Cluster->parse_cib({debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); if (not $problem) { my $node_name = $anvil->data->{cib}{parsed}{'local'}{name}; my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ready => $ready }}); if ($ready) { # We're good. $waiting = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0279"}); $anvil->Job->update_progress({progress => 15, message => "job_0279"}); } else { # Node isn't ready yet. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0278"}); $anvil->Job->update_progress({progress => 10, message => "job_0278"}); } } else { # Cluster hasn't started. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"}); $anvil->Job->update_progress({progress => 5, message => "job_0277"}); } if ($waiting) { sleep 10; } } return(0); } sub boot_server { my ($anvil, $server, $wait, $progress) = @_; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server => $server, 'wait' => $wait, progress => $progress, }}); # Verify that the server's XML file exists. my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml"; if (not -e $definition_file) { # No XML, no boot $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0259", variables => { definition_file => $definition_file }}); $anvil->Job->update_progress({progress => 100, message => "error_0259,!!definition_file!".$definition_file."!!"}); $anvil->nice_exit({exit_code => 1}); } if (not exists $anvil->data->{cib}{parsed}{data}{server}{$server}) { # XML exists, but it's not in the cluster. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0261", variables => { server => $server, definition_file => $definition_file, }}); $anvil->Job->update_progress({progress => 100, message => "error_0261,!!definition_file!".$definition_file."!!,!!server!".$server."!!"}); $anvil->nice_exit({exit_code => 1}); } my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { status => $status }}); if ($status ne "off") { # It's not off, can't boot it. if ($status eq "running") { # Some other state. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0548", variables => { server => $server }}); $anvil->Job->update_progress({progress => $progress, message => "log_0548,!!server!".$server."!!"}); return(0); } else { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0262", variables => { server => $server, status => $status, }}); $anvil->Job->update_progress({progress => 100, message => "error_0262,!!status!".$status."!!,!!server!".$server."!!"}); $anvil->nice_exit({exit_code => 1}); } } # Now boot. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0288", variables => { server => $server }}); $anvil->Job->update_progress({progress => $progress, message => "job_0288,!!server!".$server."!!"}); my $problem = $anvil->Cluster->boot_server({ debug => 2, server => $server, 'wait' => $wait, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); if ($problem) { # Failed, abort. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0267", variables => { server => $server }}); $anvil->Job->update_progress({progress => 100, message => "error_0267,!!server!".$server."!!"}); $anvil->nice_exit({exit_code => 1}); } else { if ($wait) { # Booted! $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0280", variables => { server => $server }}); $anvil->Job->update_progress({progress => $progress, message => "job_0280,!!server!".$server."!!"}); } else { # Boot requested $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0287", variables => { server => $server }}); $anvil->Job->update_progress({progress => $progress, message => "job_0287,!!server!".$server."!!"}); } } return(0); } sub boot_all_servers { my ($anvil) = @_; ### TODO: Manage the boot order here. # We top out at 90, bottom is 20. my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_count => $server_count }}); if (not $server_count) { # No servers exist yet. return(0); } # Load information about the servers on this Anvil!. my $anvil_uuid = $anvil->data->{sys}{anvil_uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { anvil_uuid => $anvil_uuid }}); my $increment = int(70 / $server_count); my $percent = 15; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { increment => $increment }}); # Loop until all are processed. my $waiting = 1; my $start_time = time; while($waiting) { # Get a list of servers now. $anvil->Database->get_servers({debug => 3}); # This will get set to 0 if any servers are waiting to boot. my $all_processed = 1; foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}}) { my $status = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{status}; my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{host_name}; my $role = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{role}; my $active = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{active}; my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid}; my $boot_delay = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_delay}; $boot_delay = 0 if not $boot_delay; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 's1:server_name' => $server_name, 's2:status' => $status, 's2:host_name' => $host_name, 's4:role' => $role, 's5:active' => $active, 's6:server_uuid' => $server_uuid, 's7:boot_delay' => $boot_delay, }}); if (not exists $anvil->data->{boot_server}{$server_name}{processed}) { # This will get set to the boot time once we actually start it. This will let # us time when servers that boot after this server can boot. $anvil->data->{boot_server}{$server_name}{processed} = 0; } elsif ($anvil->data->{boot_server}{$server_name}{processed}) { # Already processed. next; } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0719", variables => { server => $server_name }}); my $boot_after_server_uuid = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_after_server_uuid}; $boot_after_server_uuid = "" if not defined $boot_after_server_uuid; $boot_after_server_uuid = "" if $boot_after_server_uuid eq "NULL"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { boot_after_server_uuid => $boot_after_server_uuid }}); if ($boot_after_server_uuid) { if ($boot_after_server_uuid eq "00000000-0000-0000-0000-000000000000") { # This server is configured to stay off. $anvil->data->{boot_server}{$server_name}{processed} = time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "boot_server::${server_name}::processed" => $anvil->data->{boot_server}{$server_name}{processed}, }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0723", variables => { server => $server_name }}); next; } # What's the server's name. my $boot_after_server_name = $anvil->data->{servers}{server_uuid}{$boot_after_server_uuid}{server_name}; $boot_after_server_name = "" if not defined $boot_after_server_name; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { boot_after_server_name => $boot_after_server_name }}); # Has this server processed? if ($boot_after_server_name) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0717", variables => { delay => $boot_delay, server => $boot_after_server_name, }}); if (not exists $anvil->data->{boot_server}{$boot_after_server_name}) { $anvil->data->{boot_server}{$boot_after_server_name}{processed} = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "boot_server::${boot_after_server_name}::processed" => $anvil->data->{boot_server}{$boot_after_server_name}{processed}, }}); } if ($anvil->data->{boot_server}{$boot_after_server_name}{processed}) { my $processed_seconds_ago = time - $anvil->data->{boot_server}{$boot_after_server_name}{processed}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { processed_seconds_ago => $processed_seconds_ago }}); if ($processed_seconds_ago > $boot_delay) { # Ready to boot. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0721", variables => { server => $server_name }}); } else { # Not ready yet. $all_processed = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); my $time_to_wait = $boot_delay - $processed_seconds_ago; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0720", variables => { boot_after_server => $boot_after_server_name, this_server => $server_name, time_to_wait => $time_to_wait, }}); next; } } else { # The other server hasn't processed yet. $all_processed = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0718", variables => { boot_after_server => $boot_after_server_name, this_server => $server_name, }}); next; } } } if ($status eq "off") { # Boot it. my $wait = $anvil->data->{switches}{'wait'} ? 1 : 0; $percent += $increment; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'wait' => $wait, percent => $percent, }}); boot_server($anvil, $server_name, $wait, $percent); # If we're here, the server processed. $anvil->data->{boot_server}{$server_name}{processed} = time; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "boot_server::${server_name}::processed" => $anvil->data->{boot_server}{$server_name}{processed}, }}); } elsif (not $anvil->data->{boot_server}{$server_name}{processed}) { # It may have booted before we ran. $anvil->data->{boot_server}{$server_name}{processed} = time; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0722", variables => { server => $server_name }}); } } $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { all_processed => $all_processed }}); if ($all_processed) { # We're done! $waiting = 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); } else { # Wait a bit. sleep 2; my $problem = $anvil->Cluster->parse_cib({debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); } } return(0); }