* Created Cluster->add_server() which, well, adds a server to a pacemaker cluster, including sorting out location constraints to favour the node the server is running on, if it's running.

* Removed the exit-if-no-DB check in ocf:alteeve:server so that (hopefully, needs testing), running servers won't be impacted if the nodes lost contact with both/all strikers.
* Updated scan-server to make an explicit check for missing XML definition files on startup and write them if needed.
* Very beginning work on anvil-delete-server has been started.
* Updated anvil-provision-server to wait when it's running in peer mode until the new XML definition is in the DB and then write it out to disk before exiting. Also updated it to add the new server to pacemaker before exiting.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent e0ceb5c65f
commit 05b1fccdb3
  1. 156
      Anvil/Tools/Cluster.pm
  2. 4
      Anvil/Tools/Database.pm
  3. 2
      Anvil/Tools/Remote.pm
  4. 6
      notes
  5. 33
      ocf/alteeve/server
  6. 53
      scancore-agents/scan-server/scan-server
  7. 6
      scancore-agents/scan-server/scan-server.xml
  8. 13
      share/words.xml
  9. 60
      tools/anvil-delete-server
  10. 116
      tools/anvil-provision-server
  11. 4
      tools/test.pl

@ -14,6 +14,7 @@ our $VERSION = "3.0.0";
my $THIS_FILE = "Cluster.pm"; my $THIS_FILE = "Cluster.pm";
### Methods; ### Methods;
# add_server
# assemble_storage_groups # assemble_storage_groups
# boot_server # boot_server
# check_node_status # check_node_status
@ -87,6 +88,161 @@ sub parent
# Public methods # # Public methods #
############################################################################################################# #############################################################################################################
=head2 add_server
This takes a server name, finds where it is running and then adds it to pacemaker. On success, C<< 0 >> is returned. If there is a problem, C<< !!error!! >> is returned.
Parameters;
=head3 server_name (required)
This is the name of the server being added.
=cut
sub add_server
{
my $self = shift;
my $parameter = shift;
my $anvil = $self->parent;
my $debug = defined $parameter->{debug} ? $parameter->{debug} : 2;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Cluster->add_server()" }});
my $server_name = defined $parameter->{server_name} ? $parameter->{server_name} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
server_name => $server_name,
}});
if (not $server_name)
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Cluster->add_server()", parameter => "server_name" }});
return("!!error!!");
}
# Are we in the cluster?
my ($problem) = $anvil->Cluster->parse_cib({debug => $debug});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
if ($problem)
{
# The cluster isn't running, unable to add the server.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0211", variables => { server_name => $server_name }});
return("!!error!!");
}
# Does the server already exist?
if (exists $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server_name}{type})
{
# The server already exists
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0213", variables => { server_name => $server_name }});
return("!!error!!");
}
my $local_ready = $anvil->data->{cib}{parsed}{'local'}{ready};
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
my $peer_ready = $anvil->data->{cib}{parsed}{peer}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
local_name => $local_name,
local_ready => $local_ready,
peer_name => $peer_name,
peer_ready => $peer_ready,
}});
if (not $local_ready)
{
# Can't add it
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0212", variables => { server_name => $server_name }});
return("!!error!!");
}
# Find where the server is running. First, who is and where is my peer?
$anvil->Database->get_anvils({debug => $debug});
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug});
my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid};
my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid};
my $peer_host_uuid = $anvil->Get->host_uuid() eq $node1_host_uuid ? $node2_host_uuid : $node1_host_uuid;
my $peer_target_ip = $anvil->Network->find_target_ip({host_uuid => $peer_host_uuid});
my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
anvil_uuid => $anvil_uuid,
node1_host_uuid => $node1_host_uuid,
node2_host_uuid => $node2_host_uuid,
peer_host_uuid => $peer_host_uuid,
peer_target_ip => $peer_target_ip,
password => $anvil->Log->is_secure($password),
}});
# Verify that the server is here or on the peer. We need to add the command to t
$anvil->Server->find({
debug => $debug,
server => $server_name,
});
$anvil->Server->find({
debug => $debug,
refresh => 0,
password => $password,
target => $peer_target_ip,
server => $server_name,
});
# The host here is the full host name.
my $host_name = $anvil->Get->host_name();
my $server_state = $anvil->data->{server}{location}{$server_name}{status};
my $server_host = $anvil->data->{server}{location}{$server_name}{host};
my $target_role = $server_state eq "running" ? "started" : "stopped";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_name => $host_name,
server_state => $server_state,
server_host => $server_host,
target_role => $target_role,
}});
### TODO: If the target_role is 'started' because the server was running, we may need to later do an
### update to set it to 'stopped' after we've verified it's in the cluster below.
my $resource_command = $anvil->data->{path}{exe}{pcs}." resource create ".$server_name." ocf:alteeve:server name=\"".$server_name."\" meta allow-migrate=\"true\" target-role=\"".$target_role."\" op monitor interval=\"60\" start timeout=\"INFINITY\" on-fail=\"block\" stop timeout=\"INFINITY\" on-fail=\"block\" migrate_to timeout=\"INFINITY\"";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource_command => $resource_command }});
my ($output, $return_code) = $anvil->System->call({shell_call => $resource_command});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
my $constraint_command = $anvil->data->{path}{exe}{pcs}." constraint location ".$server_name." prefers ";
if (($server_state eq "running") && ($server_host ne $host_name))
{
# Set the peer as primary.
$constraint_command .= $local_name."=100 ".$peer_name."=200";
}
else
{
# Set us as primary.
$constraint_command .= $local_name."=200 ".$peer_name."=100";
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { constraint_command => $constraint_command }});
undef $output;
undef $return_code;
($output, $return_code) = $anvil->System->call({shell_call => $constraint_command});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
# Reload the CIB
($problem) = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { problem => $problem }});
# Does the server already exist?
if (not exists $anvil->data->{cib}{parsed}{cib}{resources}{primitive}{$server_name}{type})
{
# The server wasn't added
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0214", variables => { server_name => $server_name }});
return("!!error!!");
}
return(0);
}
=head2 assemble_storage_groups =head2 assemble_storage_groups

@ -3852,7 +3852,7 @@ This loads all known servers from the database.
servers::server_uuid::<server_uuid>::server_start_after_server_uuid servers::server_uuid::<server_uuid>::server_start_after_server_uuid
servers::server_uuid::<server_uuid>::server_start_delay servers::server_uuid::<server_uuid>::server_start_delay
servers::server_uuid::<server_uuid>::server_host_uuid servers::server_uuid::<server_uuid>::server_host_uuid
servers::server_uuid::<server_uuid>::server_state servers::server_uuid::<server_uuid>::server_state NOTE: This is set to 'DELETED' for deleted servers
servers::server_uuid::<server_uuid>::server_live_migration servers::server_uuid::<server_uuid>::server_live_migration
servers::server_uuid::<server_uuid>::server_pre_migration_file_uuid servers::server_uuid::<server_uuid>::server_pre_migration_file_uuid
servers::server_uuid::<server_uuid>::server_pre_migration_arguments servers::server_uuid::<server_uuid>::server_pre_migration_arguments
@ -15532,6 +15532,8 @@ ORDER BY
}}); }});
if ($anvil->data->{sys}{database}{table}{$table}{last_updated} > $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{last_updated}) if ($anvil->data->{sys}{database}{table}{$table}{last_updated} > $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{last_updated})
{ {
### TODO: This triggers with extremely high numbers. Somewhere, the time
### isn't being updated properly.
# Resync needed. # Resync needed.
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"sys::database::table::${table}::last_updated" => $anvil->data->{sys}{database}{table}{$table}{last_updated}, "sys::database::table::${table}::last_updated" => $anvil->data->{sys}{database}{table}{$table}{last_updated},

@ -599,7 +599,9 @@ sub call
's2:ssh_fh->error' => $ssh_fh->error, 's2:ssh_fh->error' => $ssh_fh->error,
}}); }});
my $variables = { my $variables = {
remote_user => $remote_user,
target => $target.":".$port, target => $target.":".$port,
user => getpwuid($<),
error => $ssh_fh->error, error => $ssh_fh->error,
connection => $ssh_fh_key, connection => $ssh_fh_key,
file => $bad_file, file => $bad_file,

@ -287,6 +287,12 @@ pcs resource update srv07-el6 ocf:alteeve:server name="srv07-el6" meta allow-mig
stonith_admin --fence el8-a01n02 --verbose; crm_error $? stonith_admin --fence el8-a01n02 --verbose; crm_error $?
pcs resource create srv01-test ocf:alteeve:server name="srv01-test" meta allow-migrate="true" target-role="started" op monitor interval="60" start timeout="INFINITY" on-fail="block" stop timeout="INFINITY" on-fail="block" migrate_to timeout="INFINITY"
pcs constraint location srv01-test prefers el8-a01n01=200 el8-a01n02=100
stonith-max-attempts=INFINITY stonith-max-attempts=INFINITY
cluster-recheck-interval puts an upper bound on the "i give up" time cluster-recheck-interval puts an upper bound on the "i give up" time

@ -96,17 +96,14 @@ $| = 1;
# NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods # NOTE: Setting 'log_level' and 'log_secure' here will get overridden in the main lopp. Use the Log methods
# in the loop as well to override defaults in code. # in the loop as well to override defaults in code.
my $anvil = Anvil::Tools->new(); my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
# If we can connect to a database, we'll set/clear the 'migrating' flag during migrations # If we can connect to a database, we'll set/clear the 'migrating' flag during migrations
$anvil->Database->connect(); $anvil->Database->connect();
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"});
if (not $anvil->data->{sys}{database}{connections}) if (not $anvil->data->{sys}{database}{connections})
{ {
# No databases, exit. # No databases, we're only going to be able to do status checks..
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "warning_0073"});
return(1);
} }
### Read or Set the environment variables ### Read or Set the environment variables
@ -155,6 +152,9 @@ $anvil->data->{environment}{OCF_RESKEY_CRM_meta_stop_drbd_resources} = 0;
# Get any command line switches. # Get any command line switches.
$anvil->Get->switches({debug => 2}); $anvil->Get->switches({debug => 2});
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
if ($anvil->data->{switches}{stop_drbd_resources}) if ($anvil->data->{switches}{stop_drbd_resources})
{ {
$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = 1; $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = 1;
@ -664,7 +664,7 @@ sub start_server
# 7. Make sure all bridges exist and soft error if not. # 7. Make sure all bridges exist and soft error if not.
# 8. Start the server. # 8. Start the server.
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0303", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0303", variables => { server => $server }});
# Make sure things are sane. # Make sure things are sane.
validate_all($anvil); validate_all($anvil);
@ -960,7 +960,7 @@ sub stop_server
# Stopping the server is simply a question of "is the server running?" and, if so, stop it. Once # Stopping the server is simply a question of "is the server running?" and, if so, stop it. Once
# stopped, we stop the DRBD resource on both nodes. # stopped, we stop the DRBD resource on both nodes.
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0582", variables => { server => $server }});
# Read in an parse the server's XML. # Read in an parse the server's XML.
$anvil->System->check_storage({debug => 3}); $anvil->System->check_storage({debug => 3});
@ -992,11 +992,13 @@ sub server_status
{ {
my ($anvil) = @_; my ($anvil) = @_;
### NOTE: This method MUST always work, even without access to databases!
# If the named server is running, return OCF_SUCCESS (rc: 0), otherwise OCF_NOT_RUNNING (rc: 7). If # If the named server is running, return OCF_SUCCESS (rc: 0), otherwise OCF_NOT_RUNNING (rc: 7). If
# the server is failed, return OCF_ERR_GENERIC (1). # the server is failed, return OCF_ERR_GENERIC (1).
my $state = ""; my $state = "";
my $server = $anvil->data->{environment}{OCF_RESKEY_name}; my $server = $anvil->data->{environment}{OCF_RESKEY_name};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, key => "log_0521", variables => { server => $server }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0521", variables => { server => $server }});
if (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout}) if (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout})
{ {
@ -1186,6 +1188,14 @@ sub migrate_server
{ {
my ($anvil) = @_; my ($anvil) = @_;
# This requires a database
if (not $anvil->data->{sys}{database}{connections})
{
# No databases, exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"});
return(1);
}
### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a ### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a
### user might want to do this (ie: sync will be done soon and the need to evacuate the node ### user might want to do this (ie: sync will be done soon and the need to evacuate the node
### ASAP is high). Maybe we'll enforce this and require a '--force' switch later? ### ASAP is high). Maybe we'll enforce this and require a '--force' switch later?
@ -1391,8 +1401,8 @@ sub migrate_server
### NOTE: Pacemaker doesn't seem to ever pull servers. ### NOTE: Pacemaker doesn't seem to ever pull servers.
# Pull the server here. Start by verifying it's on the 'meta_on_node' host. # Pull the server here. Start by verifying it's on the 'meta_on_node' host.
# Scan locally and on our peer # Scan locally and on our peer
$anvil->Server->find({debug => 3}); $anvil->Server->find({debug => 2});
$anvil->Server->find({debug => 3, target => $meta_on_node, refresh => 0}); $anvil->Server->find({debug => 2, target => $meta_on_node, refresh => 0});
my $host = defined $anvil->data->{server}{location}{$server}{host} ? $anvil->data->{server}{location}{$server}{host} : ""; my $host = defined $anvil->data->{server}{location}{$server}{host} ? $anvil->data->{server}{location}{$server}{host} : "";
my $short_host = ($host =~ /^(.*?)\..*$/)[0]; my $short_host = ($host =~ /^(.*?)\..*$/)[0];
@ -1470,6 +1480,9 @@ sub validate_all
target => $target, target => $target,
}}); }});
# Log what we're doing.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0581", variables => { server => $server }});
# Read in and parse the server's XML. # Read in and parse the server's XML.
$anvil->System->check_storage({debug => 3}); $anvil->System->check_storage({debug => 3});
$anvil->Server->get_status({debug => 2, server => $server}); $anvil->Server->get_status({debug => 2, server => $server});

@ -121,6 +121,49 @@ sub collect_data
my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); my $anvil_uuid = $anvil->Cluster->get_anvil_uuid();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { anvil_uuid => $anvil_uuid }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { anvil_uuid => $anvil_uuid }});
# Look for any servers in the database but not yet written to disk.
if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid})
{
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}})
{
my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid};
my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
's1:server_name' => $server_name,
's2:server_state' => $server_state,
's3:server_uuid' => $server_uuid,
}});
next if $server_state eq "DELETED";
# If the definition file doesn't exist at all, create the file.
my $xml_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { xml_file => $xml_file }});
if (not -f $xml_file)
{
my $server_definition = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_definition => $server_definition }});
if ($server_definition)
{
# Register an alert.
my $variables = {
server => $server_name,
definition => $server_definition,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_server_alert_0016", variables => $variables});
$anvil->Alert->register({alert_level => "notice", message => "scan_server_alert_0016", variables => $variables, set_by => $THIS_FILE});
my $return = $anvil->Storage->write_file({
body => $server_definition,
file => $xml_file,
overwrite => 1,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'return' => $return }});
}
}
}
}
my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all", source => $THIS_FILE, line => __LINE__}); my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." list --all", source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code }});
foreach my $line (split/\n/, $output) foreach my $line (split/\n/, $output)
@ -320,9 +363,13 @@ DELETED - Marks a server as no longer existing
# Get the definitions # Get the definitions
my $virsh_definition = get_and_parse_virsh_definition($anvil, $server_name); my $virsh_definition = get_and_parse_virsh_definition($anvil, $server_name);
my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name);
my $database_definition = get_and_parse_database_definition($anvil, $server_name, $server_uuid); my $database_definition = get_and_parse_database_definition($anvil, $server_name, $server_uuid);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { on_disk_definition => $on_disk_definition }}); my $on_disk_definition = get_and_parse_disk_definition($anvil, $server_name);
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => {
virsh_definition => $virsh_definition,
database_definition => $database_definition,
on_disk_definition => $on_disk_definition,
}});
# If the 'server_updated_by_user' value is newer than the file age, and there is a difference in the definition, update the file. # If the 'server_updated_by_user' value is newer than the file age, and there is a difference in the definition, update the file.
my $current_time = time; my $current_time = time;
@ -773,7 +820,7 @@ sub get_and_parse_virsh_definition
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server_name => $server_name }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { server_name => $server_name }});
my ($virsh_definition, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." dumpxml --inactive ".$server_name, source => $THIS_FILE, line => __LINE__}); my ($virsh_definition, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{virsh}." dumpxml --inactive ".$server_name, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $virsh_definition, return_code => $return_code }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { output => $virsh_definition, return_code => $return_code }});
$anvil->Server->parse_definition({ $anvil->Server->parse_definition({
server => $server_name, server => $server_name,

@ -83,6 +83,12 @@ The amount of RAM configured for the server: [#!variable!server!#] has changed f
- NOTE: This RAM will not be used until the server is power cycled! - NOTE: This RAM will not be used until the server is power cycled!
</key> </key>
<key name="scan_server_alert_0016">
A server named: [#!variable!server!#] was found in the database, but there was no definition file on disk. Saving the definition file now. The definition XML is:
====
#!variable!definition!#
====
</key>
<!-- Log entries --> <!-- Log entries -->
<key name="scan_server_log_0001">Starting: [#!variable!program!#].</key> <key name="scan_server_log_0001">Starting: [#!variable!program!#].</key>

@ -293,6 +293,11 @@ Output (if any):
#!variable!shell_call!# #!variable!shell_call!#
====</key> ====</key>
<key name="error_0210">The call to create the new server appears to have failed. It hasn't shown up as running after 10 seconds. The status, if any, was last seen as: [#!variable!status!#].</key> <key name="error_0210">The call to create the new server appears to have failed. It hasn't shown up as running after 10 seconds. The status, if any, was last seen as: [#!variable!status!#].</key>
<key name="error_0211">Failed to add the server: [#!variable!server_name!#] because we failed to parse the CIB. Is the cluster running?</key>
<key name="error_0212">Failed to add the server: [#!variable!server_name!#] because we are not a full cluster member?</key>
<key name="error_0213">Failed to add the server: [#!variable!server_name!#] because it appears to already exist in the cluster.</key>
<key name="error_0214">Failed to add the server: [#!variable!server_name!#]. After the commands to add it ran, it was still not found in the cluster.</key>
<key name="error_0215">It looks like something went wrong while adding the server to the cluster. There should be more information in the logs.</key>
<!-- Files templates --> <!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which likes are translatable --> <!-- NOTE: Translating these files requires an understanding of which likes are translatable -->
@ -615,6 +620,9 @@ It should be provisioned in the next minute or two.</key>
<key name="job_0202">Done! The server should now be booting. Connect now and finish the OS install.</key> <key name="job_0202">Done! The server should now be booting. Connect now and finish the OS install.</key>
<key name="job_0203">The resource: [#!variable!resource!#] is now up.</key> <key name="job_0203">The resource: [#!variable!resource!#] is now up.</key>
<key name="job_0204">We're the peer for this new server, and so we're now done. The other node will complete the server's install momentarily.</key> <key name="job_0204">We're the peer for this new server, and so we're now done. The other node will complete the server's install momentarily.</key>
<key name="job_0205">As we're the peer, we're now going to wait for the new server definition to be added to the database, then write it out to disk.</key>
<key name="job_0206">The definition file: [#!variable!file!#] has been saved.</key>
<key name="job_0207">Preparing to add the server to the central cluster manager.</key>
<!-- Log entries --> <!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key> <key name="log_0001">Starting: [#!variable!program!#].</key>
@ -1289,13 +1297,15 @@ The file: [#!variable!file!#] needs to be updated. The difference is:
<key name="log_0578">#!free!#</key> <key name="log_0578">#!free!#</key>
<key name="log_0579">The LV(s) behind the resource: [#!variable!resource!#] have had their DRBD metadata created successfully.</key> <key name="log_0579">The LV(s) behind the resource: [#!variable!resource!#] have had their DRBD metadata created successfully.</key>
<key name="log_0580">The LV(s) behind the resource: [#!variable!resource!#] have been forced to primary to initialize the resource.</key> <key name="log_0580">The LV(s) behind the resource: [#!variable!resource!#] have been forced to primary to initialize the resource.</key>
<key name="log_0581">Asked to validate that the server: [#!variable!server!#] is able to run.</key>
<key name="log_0582">We've been asked to stop the server: [#!variable!server!#].</key>
<!-- Messages for users (less technical than log entries), though sometimes used for logs, too. --> <!-- Messages for users (less technical than log entries), though sometimes used for logs, too. -->
<key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key> <key name="message_0001">The host name: [#!variable!target!#] does not resolve to an IP address.</key>
<key name="message_0002">The connection to: [#!variable!connection!#] was refused. If you recently booted the target, the network might have started, the ssh daemon might not be running yet.</key> <key name="message_0002">The connection to: [#!variable!connection!#] was refused. If you recently booted the target, the network might have started, the ssh daemon might not be running yet.</key>
<key name="message_0003">There is no route to: [#!variable!target!#]. Is the machine (or the interface) up?</key> <key name="message_0003">There is no route to: [#!variable!target!#]. Is the machine (or the interface) up?</key>
<key name="message_0004">Timed out while waiting for a reply from: [#!variable!target!#]. Is the machine booting up? If so, please wait a minute or two and try again.</key> <key name="message_0004">Timed out while waiting for a reply from: [#!variable!target!#]. Is the machine booting up? If so, please wait a minute or two and try again.</key>
<key name="message_0005">There was an unknown error while connecting to: [#!variable!target!#]. The error was: [#!variable!error!#]</key> <key name="message_0005">There was an unknown error while connecting as: [#!variable!user!#] to: [#!variable!remote_user!#@#!variable!target!#]. The error was: [#!variable!error!#]</key>
<key name="message_0006">We were unable to log in to: [#!variable!connection!#]. Please check that the password is correct or that passwordless SSH is configured properly.</key> <key name="message_0006">We were unable to log in to: [#!variable!connection!#]. Please check that the password is correct or that passwordless SSH is configured properly.</key>
<key name="message_0007">An SSH session was successfully opened to: [#!variable!target!#].</key> <key name="message_0007">An SSH session was successfully opened to: [#!variable!target!#].</key>
<key name="message_0008">The remote shell call: [#!variable!shell_call!#] to: [#!variable!connection!#] failed with the error: [#!variable!error!#].</key> <key name="message_0008">The remote shell call: [#!variable!shell_call!#] to: [#!variable!connection!#] failed with the error: [#!variable!error!#].</key>
@ -2167,6 +2177,7 @@ Read UUID: .... [#!variable!read_uuid!#]
<key name="warning_0070">[ Warning ] - Unable to report the available resources for the Anvil! [#!variable!anvil_name!#] as it looks like ScanCore has not yet run. Please try again after starting the 'scancore' daemon on the nodes.</key> <key name="warning_0070">[ Warning ] - Unable to report the available resources for the Anvil! [#!variable!anvil_name!#] as it looks like ScanCore has not yet run. Please try again after starting the 'scancore' daemon on the nodes.</key>
<key name="warning_0071">[ Warning ] - We were asked to create a new storage group called: [#!variable!name!#] but that name is already used by the group with UUID: [#!variable!uuid!#].</key> <key name="warning_0071">[ Warning ] - We were asked to create a new storage group called: [#!variable!name!#] but that name is already used by the group with UUID: [#!variable!uuid!#].</key>
<key name="warning_0072">[ Warning ] - The file: [#!variable!file_path!#] was not found on any accessible Striker dashboard (or it isn't the same size as recorded in the database). Will sleep for a minute and exit, then we'll try again.</key> <key name="warning_0072">[ Warning ] - The file: [#!variable!file_path!#] was not found on any accessible Striker dashboard (or it isn't the same size as recorded in the database). Will sleep for a minute and exit, then we'll try again.</key>
<key name="warning_0073">[ Warning ] - No databases are available. Some functions of this resource agent will not be available.</key>
<!-- The entries below here are not sequential, but use a key to find the entry. --> <!-- The entries below here are not sequential, but use a key to find the entry. -->
<!-- Run 'striker-parse-os-list to find new entries. --> <!-- Run 'striker-parse-os-list to find new entries. -->

@ -0,0 +1,60 @@
#!/usr/bin/perl
#
# This provisions a new virtual machine server. It handles creating the logical volumes, DRBD resources,
# verifies the needed files are ready, creates the provision script, begins the installation, and adds the
# new server to pacemaker.
#
# Exit codes;
# 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Term::Cap;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
$anvil->Log->level({set => 2});
$anvil->Log->secure({set => 1});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is
# passed directly, it will be used. Otherwise, the password will be read from the database.
$anvil->data->{switches}{'anvil-uuid'} = "";
$anvil->data->{switches}{'anvil-name'} = "";
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'server-name'} = "";
$anvil->data->{switches}{'server-uuid'} = "";
$anvil->Get->switches;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::anvil-uuid' => $anvil->data->{switches}{'anvil-uuid'},
'switches::anvil-name' => $anvil->data->{switches}{'anvil-name'},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::server-name' => $anvil->data->{switches}{'server-name'},
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'},
}});
$anvil->nice_exit({exit_code => 0});
#############################################################################################################
# Functions #
#############################################################################################################

@ -6,6 +6,8 @@
# #
# Exit codes; # Exit codes;
# 0 = Normal exit. # 0 = Normal exit.
# 1 = Any problem that causes an early exit.
#
use strict; use strict;
use warnings; use warnings;
@ -257,18 +259,115 @@ sub run_jobs
# If we're here, we can finally craft the 'virt-install' call!. # If we're here, we can finally craft the 'virt-install' call!.
if ($anvil->data->{job}{peer_mode}) if ($anvil->data->{job}{peer_mode})
{ {
# The peer is done, it'll pick up the XML definition when ScanCore runs # Wait until we have seen the definition in the database and written to disk
write_definition($anvil);
}
else
{
provision_server($anvil);
}
# Add the server to the cluster.
add_server_to_cluster($anvil);
# Done!
$anvil->Job->update_progress({ $anvil->Job->update_progress({
progress => 100, progress => 100,
message => "job_0204", message => "job_0202",
}); });
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0204"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0202"});
return(0);
}
sub add_server_to_cluster
{
my ($anvil) = @_;
# Parse the cluster
$anvil->Job->update_progress({
progress => 90,
message => "job_0207",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0207"});
# Is our peer in the cluster? For that matter, are we?
my $problem = $anvil->Cluster->add_server({server_name => $anvil->data->{job}{server_name}});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if ($problem)
{
# Something went wrong
$anvil->Job->update_progress({
progress => 100,
message => "error_0215",
job_status => "failed",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0215"});
$anvil->nice_exit({exit_code => 1});
}
return(0);
}
# This loops until the definition file is found in the database and then writes it out to disk.
sub write_definition
{
my ($anvil) = @_;
my $anvil_uuid = $anvil->data->{job}{anvil_uuid};
my $server_name = $anvil->data->{job}{server_name};
my $xml_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { xml_file => $xml_file }});
if (not -f $xml_file)
{
# Tell the user we're going to wait until we find the server in the database.
$anvil->Job->update_progress({
progress => 70,
message => "job_0205",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0205"});
while (not -f $xml_file)
{
$anvil->Database->get_servers();
$anvil->Database->get_server_definitions();
my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid};
my $server_definition = $anvil->data->{server_definitions}{server_definition_server_uuid}{$server_uuid}{server_definition_xml};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_definition => $server_definition }});
if ($server_definition)
{
# Write it!
my $return = $anvil->Storage->write_file({
body => $server_definition,
file => $xml_file,
overwrite => 1,
});
if (-f $xml_file)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { 'return' => $return }});
$anvil->Job->update_progress({
progress => 80,
message => "job_0206,!!file!".$xml_file."!!",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0206", variables => { file => $xml_file }});
}
} }
else else
{ {
provision_server($anvil); sleep 1;
}
}
} }
# The peer is done, it'll pick up the XML definition when ScanCore runs
$anvil->Job->update_progress({
progress => 100,
message => "job_0204",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0204"});
return(0); return(0);
} }
@ -377,7 +476,7 @@ sub provision_server
} }
$anvil->Job->update_progress({ $anvil->Job->update_progress({
progress => 90, progress => 85,
message => "job_0201", message => "job_0201",
}); });
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0201"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0201"});
@ -448,13 +547,6 @@ sub provision_server
return_code => $return_code, return_code => $return_code,
}}); }});
# Done!
$anvil->Job->update_progress({
progress => 100,
message => "job_0202",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0202"});
return(0); return(0);
} }

@ -26,8 +26,6 @@ $anvil->Get->switches;
$anvil->Database->connect({debug => 3}); $anvil->Database->connect({debug => 3});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"});
my $anvil_uuid = "a5ae5242-e9d3-46c9-9ce8-306855aa56db"; $anvil->Cluster->add_server({server_name => "srv01-test"});
my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({anvil_uuid => "a5ae5242-e9d3-46c9-9ce8-306855aa56db"});
print "Next free minor: [".$free_minor."], port: [".$free_port."]\n";
$anvil->nice_exit({exit_code => 0}); $anvil->nice_exit({exit_code => 0});

Loading…
Cancel
Save