diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm index dc97b877..5e3a5477 100644 --- a/Anvil/Tools/Cluster.pm +++ b/Anvil/Tools/Cluster.pm @@ -1694,7 +1694,7 @@ sub parse_cib if ($anvil->Network->is_local({host => $target})) { # Local call - ($cib_data, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + ($cib_data, $return_code) = $anvil->System->call({debug => ($debug + 1), shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { cib_data => $cib_data, return_code => $return_code, @@ -1704,7 +1704,7 @@ sub parse_cib { # Remote call. ($cib_data, my $error, $return_code) = $anvil->Remote->call({ - debug => $debug, + debug => ($debug + 1), shell_call => $shell_call, target => $target, port => $port, @@ -2415,7 +2415,7 @@ sub parse_crm_mon if ($anvil->Network->is_local({host => $target})) { # Local call - ($crm_mon_data, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + ($crm_mon_data, $return_code) = $anvil->System->call({debug => ($debug + 1), shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { crm_mon_data => $crm_mon_data, return_code => $return_code, @@ -2463,7 +2463,7 @@ sub parse_crm_mon foreach my $resource ($dom->findnodes('/pacemaker-result/resources/resource')) { next if $resource->{resource_agent} ne "ocf::alteeve:server"; - my $id = $resource->{id}; + my $id = $resource->{id}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { id => $id }}); foreach my $variable (sort {$a cmp $b} keys %{$resource}) { diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index 097631b4..e9aa076d 100644 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -278,9 +278,9 @@ sub delete_resource $anvil->DRBD->gather_data({debug => $debug}); if (not exists $anvil->data->{new}{resource}{$resource}) { - # Resource not found. + # Resource not found, so it appears to already be gone. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0228", variables => { resource => $resource }}); - return('!!error!!'); + return(0); } my $waiting = 1; @@ -1100,23 +1100,25 @@ sub get_devices =head2 get_next_resource -This returns the next free DRBD minor number and the next free TCP port. The minor number is the first one found to be free. The TCP port is allocated in steps of three. That is to say, if the last used TCP port is '7790', then '7793' is considered the next free port. This is to ensure that if a DR host is added or used, the three adjacent ports are available for use in one resource configuration. +This returns the next free DRBD minor number and the next free TCP port. The minor number and TCP port returned are ones found to be free on both/all machines in Anvil! system. As such, the returned values may skip values free on any given system. -Minor numbers are not grouped as resources and volumes can be referenced by name, so the DRBD minor number is less important for human users. +If a resource name is given, then the caller can either return an error if the name matches (useful for name conflict checks) or return the first (lowest) minor number and TCP used by the resource. my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({anvil_uuid => "a5ae5242-e9d3-46c9-9ce8-306855aa56db"}) If there is a problem, two empty strings will be returned. +B<< Note >>: Deleted resources, volumes and peers are ignored! As such, a minor or TCP port that used to be used by deleted resource can be returned. + Parameters; -=head3 anvil_uuid (required) +=head3 anvil_uuid (optional, default 'Cluster->get_anvil_uuid') -This is the Anvil! in which we're looking for the next free resources. +This is the Anvil! in which we're looking for the next free resources. It's required, but generally it doesn't need to be specified as we can find it via C<< Cluster->get_anvil_uuid() >>. =head3 resource_name (optional) -If this is set, and the resource is found to already exist, the first DRBD minor number and first used TCP port are returned. Alternatively, if C<< force_unique >> is set to C<< 1 >>, and the resource is found to exist, C<< !!error!! >> is returned. +If this is set, and the resource is found to already exist, the first DRBD minor number and first used TCP port are returned. Alternatively, if C<< force_unique >> is set to C<< 1 >>, and the resource is found to exist, empty strings are returned. =head3 force_unique (optional, default '0') @@ -1131,8 +1133,6 @@ sub get_next_resource my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "DRBD->get_next_resource()" }}); - my $free_minor = ""; - my $free_port = ""; my $anvil_uuid = defined $parameter->{anvil_uuid} ? $parameter->{anvil_uuid} : ""; my $resource_name = defined $parameter->{resource_name} ? $parameter->{resource_name} : ""; my $force_unique = defined $parameter->{force_unique} ? $parameter->{force_unique} : 0; @@ -1142,21 +1142,30 @@ sub get_next_resource force_unique => $force_unique, }}); + # If we weren't passed an anvil_uuid, see if we can find one locally + if (not $anvil_uuid) + { + $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { anvil_uuid => $anvil_uuid }}); + } + if (not $anvil_uuid) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->get_next_resource()", parameter => "anvil_uuid" }}); - return($free_minor, $free_port); + return("", ""); } $anvil->Database->get_anvils({debug => $debug}); if (not exists $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0162", variables => { anvil_uuid => $anvil_uuid }}); - return($free_minor, $free_port); + return("", ""); } # Read in the resource information from both nodes. They _should_ be identical, but that's not 100% # certain. + my $free_minor = ""; + my $free_port = ""; my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; @@ -1166,26 +1175,53 @@ sub get_next_resource dr1_host_uuid => $dr1_host_uuid, }}); - my $query = " +my $query = " SELECT - scan_drbd_resource_host_uuid, - scan_drbd_resource_name, - scan_drbd_resource_xml + a.host_uuid, + a.host_name, + b.scan_drbd_resource_name, + c.scan_drbd_volume_number, + c.scan_drbd_volume_device_path, + c.scan_drbd_volume_device_minor, + d.scan_drbd_peer_host_name, + d.scan_drbd_peer_ip_address, + d.scan_drbd_peer_protocol, + d.scan_drbd_peer_fencing, + d.scan_drbd_peer_tcp_port FROM - scan_drbd_resources + hosts a, + scan_drbd_resources b, + scan_drbd_volumes c, + scan_drbd_peers d WHERE - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node1_host_uuid)." -OR - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node2_host_uuid)." "; + a.host_uuid = b.scan_drbd_resource_host_uuid +AND + b.scan_drbd_resource_uuid = c.scan_drbd_volume_scan_drbd_resource_uuid +AND + c.scan_drbd_volume_uuid = d.scan_drbd_peer_scan_drbd_volume_uuid +AND + b.scan_drbd_resource_xml != 'DELETED' +AND + c.scan_drbd_volume_device_path != 'DELETED' +AND + d.scan_drbd_peer_connection_state != 'DELETED' +AND + ( + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node1_host_uuid)." + OR + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node2_host_uuid)." "; if ($dr1_host_uuid) { $query .= " -OR - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($dr1_host_uuid)." "; + OR + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($dr1_host_uuid)." "; } $query .= " + ) ORDER BY - scan_drbd_resource_name ASC + b.scan_drbd_resource_name ASC, + c.scan_drbd_volume_device_minor ASC, + d.scan_drbd_peer_tcp_port ASC ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); @@ -1197,110 +1233,58 @@ ORDER BY }}); foreach my $row (@{$results}) { - my $scan_drbd_resource_host_uuid = $row->[0]; - my $scan_drbd_resource_name = $row->[1]; - my $scan_drbd_resource_xml = $row->[2]; + # I don't really need most of this, but it helps with debugging + my $host_uuid = $row->[0]; + my $host_name = $row->[1]; + my $scan_drbd_resource_name = $row->[2]; + my $scan_drbd_volume_number = $row->[3]; + my $scan_drbd_volume_device_path = $row->[4]; + my $scan_drbd_volume_device_minor = $row->[5]; + my $scan_drbd_peer_host_name = $row->[6]; + my $scan_drbd_peer_ip_address = $row->[7]; + my $scan_drbd_peer_protocol = $row->[8]; + my $scan_drbd_peer_fencing = $row->[9]; + my $scan_drbd_peer_tcp_port = $row->[10]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - scan_drbd_resource_host_uuid => $scan_drbd_resource_host_uuid, - scan_drbd_resource_name => $scan_drbd_resource_name, - scan_drbd_resource_xml => $scan_drbd_resource_xml, + 's1:host_uuid' => $host_uuid, + 's2:host_name' => $host_name, + 's3:scan_drbd_resource_name' => $scan_drbd_resource_name, + 's4:scan_drbd_volume_number' => $scan_drbd_volume_number, + 's5:scan_drbd_volume_device_path' => $scan_drbd_volume_device_path, + 's6:scan_drbd_volume_device_minor' => $scan_drbd_volume_device_minor, + 's7:scan_drbd_peer_host_name' => $scan_drbd_peer_host_name, + 's8:scan_drbd_peer_ip_address' => $scan_drbd_peer_ip_address, + 's9:scan_drbd_peer_protocol' => $scan_drbd_peer_protocol, + 's10:scan_drbd_peer_fencing' => $scan_drbd_peer_fencing, + 's11:scan_drbd_peer_tcp_port' => $scan_drbd_peer_tcp_port, }}); - next if $scan_drbd_resource_xml eq "DELETED"; - - local $@; - my $dom = eval { XML::LibXML->load_xml(string => $scan_drbd_resource_xml); }; - if ($@) - { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "error_0111", variables => { - xml_body => $scan_drbd_resource_xml, - eval_error => $@, - }}); - next; - } - - # Successful parse! - my $local_minor = ""; - my $local_port = ""; - foreach my $name ($dom->findnodes('/resource')) - { - my $resource = $name->{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource => $resource }}); - - foreach my $host ($name->findnodes('./host')) - { - my $host_name = $host->{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_name => $host_name }}); - - foreach my $volume_vnr ($host->findnodes('./volume')) - { - my $volume = $volume_vnr->{vnr}; - my $minor = $volume_vnr->findvalue('./device/@minor'); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - 's1:host_name' => $host_name." \@ ".$resource."/".$volume, - 's2:minor' => $minor, - }}); - - $anvil->data->{drbd}{used_resources}{minor}{$minor}{used} = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "drbd::used_resources::minor::${minor}::used" => $anvil->data->{drbd}{used_resources}{minor}{$minor}{used}, - }}); - - if (not $local_minor) - { - $local_minor = $minor; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { local_minor => $local_minor }}); - } - } - } - - foreach my $connection ($name->findnodes('./connection')) - { - foreach my $host ($connection->findnodes('./host')) - { - my $host_name = $host->{name}; - my $tcp_port = $host->findvalue('./address/@port'); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - host_name => $host_name, - tcp_port => $tcp_port, - }}); - - $anvil->data->{drbd}{used_resources}{tcp_port}{$tcp_port}{used} = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "drbd::used_resources::tcp_port::${tcp_port}::used" => $anvil->data->{drbd}{used_resources}{tcp_port}{$tcp_port}{used}, - }}); - - if (not $local_port) - { - $local_port = $tcp_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { local_port => $local_port }}); - } - } - } - } + $anvil->data->{drbd}{used_resources}{minor}{$scan_drbd_volume_device_minor}{used} = 1; + $anvil->data->{drbd}{used_resources}{tcp_port}{$scan_drbd_peer_tcp_port}{used} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "drbd::used_resources::minor::${scan_drbd_volume_device_minor}::used" => $anvil->data->{drbd}{used_resources}{minor}{$scan_drbd_volume_device_minor}{used}, + "drbd::used_resources::tcp_port::${scan_drbd_peer_tcp_port}::used" => $anvil->data->{drbd}{used_resources}{tcp_port}{$scan_drbd_peer_tcp_port}{used}, + }}); - # Is the user looking for this resource? - if (($resource_name) && ($resource_name eq $scan_drbd_resource_name)) + if (($resource_name) && ($scan_drbd_resource_name eq $resource_name)) { - # If we're force_unique, error. + # Found the resource the user was asking for. if ($force_unique) { # Error out. - return('!!error!!'); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => 'err', key => "error_0237", variables => { resource_name => $resource_name }}); + return("", ""); } else { - $free_minor = $local_minor; - $free_port = $local_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - free_minor => $free_minor, - free_port => $free_port, - }}); - return($free_minor, $free_port); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0592", variables => { resource_name => $resource_name }}); + return($scan_drbd_volume_device_minor, $scan_drbd_peer_tcp_port); } } } + # If I'm here, I need to find the next free TCP port. We'll look for the next minor number for this + # host. my $looking = 1; $free_minor = 0; while($looking) @@ -1323,7 +1307,7 @@ ORDER BY { if (exists $anvil->data->{drbd}{used_resources}{tcp_port}{$free_port}) { - $free_port += 3; + $free_port++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_port => $free_port }}); } else diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index b7f4e940..7c3e851e 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -2510,6 +2510,10 @@ Parameters; This is the host UUID we're querying the name of. +=head3 include_deleted (optional, default '0') + +If set to C<< 1 >>, hosts that are deleted are included. If you use this, and a machine was replaced, then watch for multiple host UUIDs. + =head3 short (optional, default '0') If set to C<< 1 >>, the short host name is returned. When set to C<< 0 >>, the full host name is returned. @@ -2523,12 +2527,14 @@ sub get_host_from_uuid my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_host_from_uuid()" }}); - my $host_name = ""; - my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; - my $short = defined $parameter->{short} ? $parameter->{short} : 0; + my $host_name = ""; + my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; + my $include_deleted = defined $parameter->{include_deleted} ? $parameter->{include_deleted} : 0; + my $short = defined $parameter->{short} ? $parameter->{short} : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - host_uuid => $host_uuid, - short => $short, + host_uuid => $host_uuid, + include_deleted => $include_deleted, + short => $short, }}); if (not $host_uuid) @@ -2538,7 +2544,21 @@ sub get_host_from_uuid return($host_name); } - my $query = "SELECT host_name FROM hosts WHERE host_uuid = ".$anvil->Database->quote($host_uuid).";"; + my $query = " +SELECT + host_name +FROM + hosts +WHERE + host_uuid = ".$anvil->Database->quote($host_uuid); + if (not $include_deleted) + { + $query .= " +AND + host_key != 'DELETED'"; + } + $query .= " +;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); @@ -2592,7 +2612,13 @@ And to simplify look-ups by UUID or name; To prevent some cases of recursion, C<< hosts::loaded >> is set on successful load, and if this is set, this method immediately returns with C<< 0 >>. -This method takes no parameters. +Parameters; + +=head3 include_deleted (optional, default '0') + +By default, hosts that have been deleted (C<< host_key >> set to C<< DELETED >>) are not returned. If this is set to C<< 1 >>, those deleted hosts are included. + +B<< Note >>: Be careful when using this. If a machine was replaced, then there could be two (or more) host UUIDs for a given host name. =cut sub get_hosts @@ -2603,6 +2629,11 @@ sub get_hosts my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_hosts()" }}); + my $include_deleted = defined $parameter->{include_deleted} ? $parameter->{include_deleted} : 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + include_deleted => $include_deleted, + }}); + # Delete any data from past scans. delete $anvil->data->{hosts}{host_uuid}; delete $anvil->data->{sys}{hosts}{by_uuid}; @@ -2620,7 +2651,14 @@ SELECT host_ipmi, modified_date FROM - hosts + hosts "; + if (not $include_deleted) + { + $query .= " +WHERE + host_key != 'DELETED'"; + } + $query .= " ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); diff --git a/Anvil/Tools/Job.pm b/Anvil/Tools/Job.pm index 56dcfc00..3c1bfa62 100644 --- a/Anvil/Tools/Job.pm +++ b/Anvil/Tools/Job.pm @@ -356,9 +356,10 @@ FROM WHERE job_command LIKE ".$anvil->Database->quote("%".$program."%")." AND - job_progress != '100' + job_progress = 0 AND job_host_uuid = ".$anvil->Database->quote($host_uuid)." +LIMIT 1 ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); diff --git a/share/words.xml b/share/words.xml index d3674532..0835efa6 100644 --- a/share/words.xml +++ b/share/words.xml @@ -310,7 +310,7 @@ Output (if any): Unable to delete the server resource: [#!variable!server_name!#] as this node is not (yet) a full member of the cluster. It looks like to removal of the server resource: [#!variable!server_name!#] failed. The return code should have been '0', but: [#!variable!return_code!#] was returned. The 'pcs' command output, if any, was: [#!variable!output!#]. It looks like to removal of the server resource: [#!variable!server_name!#] failed. Unsafe to proceed with the removal of the server. Please check the logs for more information. - Unable to delete the resource: [#!variable!resource!#] because it wasn't found in DRBD's config. + Unable to delete the resource: [#!variable!resource!#] because it wasn't found in DRBD's config. This can happen is a previous delete partially completed, in which case this is not a problem. One or more peers need us, and we're not allowed to wait. Deletion aborted. The shell call: [#!variable!shell_call!#] was expected to return '0', but instead the return code: [#!variable!return_code!#] was received. The output, if any, was: [#!variable!output!#]. This host is not an Anvil! node or DR host, unable to migrate servers. @@ -319,6 +319,8 @@ Output (if any): Unable to find the target host to migrate to the job UUID: [#!variable!job_uuid!#]. The migration target host: [#!variable!target_host_uuid!#] is either invalid, or doesn't match one of the nodes in this Anvil! system. There appears to be no resource data in the database for the host: [#!variable!host_name!#]. Has ScanCore run and, specifically, has 'scan-hardware' run yet? Unable to provide available resources for this Anvil! system. + The resource name: [#!variable!resource_name!#] already exists, and 'force_unique' is set. This is likely a name conflict, returning '!!error!!'. + This node is not yet fully in the cluster. Sleeping for a bit, then we'll exit. The job will try again shortly after. @@ -657,6 +659,9 @@ It should be provisioned in the next minute or two. Manually calling 'scan-drbd' to ensure that the new agent is recorded. The server name: [#!variable!server_name!#] is already used by another server. Deleting the server's definition file: [#!variable!file!#]... + The server: [#!variable!server_name!#] was not found in the cluster configuration. This can happen if a server was partially deleted and we're trying again. + Preparing to delete the server: [#!variable!server_name!#]. + Using virsh to destroy (force off) the server: [#!variable!server_name!#], if it is still running. Starting: [#!variable!program!#]. @@ -1342,6 +1347,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Deleting the file: [#!variable!file!#]. Wiping the metadata from the DRBD resource: [#!variable!resource!#]. Wiping any file system signatures and then deleting the logical volume: [#!variable!device_path!#]. + The resource name: [#!variable!resource_name!#] was found, returning the first TCP port and minor number. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-delete-server b/tools/anvil-delete-server index cf716ef5..0fee1e94 100755 --- a/tools/anvil-delete-server +++ b/tools/anvil-delete-server @@ -26,23 +26,21 @@ if (($running_directory =~ /^\./) && ($ENV{PWD})) $| = 1; my $anvil = Anvil::Tools->new(); -$anvil->Log->level({set => 2}); -$anvil->Log->secure({set => 1}); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); # Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is # passed directly, it will be used. Otherwise, the password will be read from the database. $anvil->data->{switches}{'job-uuid'} = ""; $anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); $anvil->Database->connect(); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try # again after we exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0218"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => "err", key => "error_0218"}); sleep 10; $anvil->nice_exit({exit_code => 1}); } @@ -54,14 +52,14 @@ if (not $anvil->data->{sys}{database}{connections}) # and, if available, the DR host. At this point, the job acts the same regardless of the host. The DRBD # resource will stopped and then have it's metadata wiped, The LV backing the device will be deleted next. -$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid({debug => 2}); +$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::anvil_uuid' => $anvil->data->{sys}{anvil_uuid} }}); # If we don't have a job UUID, try to find one. if (not $anvil->data->{switches}{'job-uuid'}) { # Load the job data. - $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE}); + $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({debug => 2, program => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); } @@ -93,7 +91,7 @@ if ($anvil->data->{switches}{'job-uuid'}) } # Log an exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); $anvil->nice_exit({exit_code => 1}); } @@ -105,7 +103,7 @@ else if (not $anvil->data->{sys}{anvil_uuid}) { # We can't do anything, exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); $anvil->nice_exit({exit_code => 1}); } @@ -144,11 +142,17 @@ sub run_jobs remove_from_pacemaker($anvil); } + $anvil->Job->update_progress({ + progress => 25, + message => "job_0222,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0222", variables => { server_name => $server_name }}); + $anvil->Job->update_progress({ progress => 50, message => "job_0213", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0213"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0213"}); ### NOTE: If we're a DR host, and the server wasn't used here, this is expected to fail # Delete the DRBD resource and backing storage @@ -162,7 +166,7 @@ sub run_jobs message => "error_0228,!!resource!".$server_name."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0228", variables => { resource => $server_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0228", variables => { resource => $server_name }}); $anvil->nice_exit({exit_code => 1}); } @@ -170,7 +174,7 @@ sub run_jobs progress => 60, message => "job_0214", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0214"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0214"}); # Make sure the server is flagged as DELETEd. $anvil->Database->get_servers(); @@ -184,7 +188,8 @@ SET server_state = 'DELETED', modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})." WHERE - server_uuid = ".$anvil->Database->quote($server_uuid).";"; + server_uuid = ".$anvil->Database->quote($server_uuid)." +;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { query => $query }}); $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); @@ -192,7 +197,7 @@ WHERE progress => 70, message => "job_0215", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0215"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0215"}); } # Delete the XML definition file. @@ -205,7 +210,7 @@ WHERE progress => 80, message => "job_0220,!!file!".$resource_file."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0220", variables => { file => $resource_file }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0220", variables => { file => $resource_file }}); unlink $resource_file; } @@ -213,7 +218,7 @@ WHERE progress => 100, message => "job_0216", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0216"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0216"}); return(0); } @@ -235,46 +240,110 @@ sub remove_from_pacemaker progress => 10, message => "job_0210,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0210", variables => { server_name => $server_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0210", variables => { server_name => $server_name }}); - my $problem = $anvil->Cluster->shutdown_server({ - debug => 2, - server => $server_name, - 'wait' => 1, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) + if (not $anvil->data->{cib}{parsed}{data}{server}{$server_name}) { - # Failed to stop. + # Server is already out of the cluster. $anvil->Job->update_progress({ - progress => 100, - message => "error_0223,!!server_name!".$server_name."!!", - job_status => "failed", + progress => 20, + message => "job_0221,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); - $anvil->nice_exit({exit_code => 1}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0211", variables => { server_name => $server_name }}); + + # Force the server off now, just in case it's running outside the cluster + $anvil->Job->update_progress({ + progress => 25, + message => "job_0223,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0223", variables => { server_name => $server_name }}); + my $success = $anvil->Server->shutdown_virsh({ + debug => 2, + force => 1, + 'wait' => 1, + server => $server_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); + if (not $success) + { + # Failed to stop + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } } - - # Server is off now. - $anvil->Job->update_progress({ - progress => 20, - message => "job_0211,!!server_name!".$server_name."!!", - }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0211", variables => { server_name => $server_name }}); - - # Delete the resource. - $problem = $anvil->Cluster->delete_server({debug => 2, server_name => $server_name}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) + elsif ($anvil->data->{cib}{parsed}{data}{server}{$server_name}{status} ne "off") { - # Something went wrong + # As we're going to delete the server, we won't wait. We'll come back here and destroy the + # server if it's still running. + my $problem = $anvil->Cluster->shutdown_server({ + debug => 2, + server => $server_name, + 'wait' => 0, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Failed to stop. + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } + + # Force the server off now. $anvil->Job->update_progress({ - progress => 100, - message => "error_0227,!!server_name!".$server_name."!!", - job_status => "failed", + progress => 20, + message => "job_0223,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0227", variables => { server_name => $server_name }}); - $anvil->nice_exit({exit_code => 1}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0223", variables => { server_name => $server_name }}); + my $success = $anvil->Server->shutdown_virsh({ + debug => 2, + force => 1, + 'wait' => 1, + server => $server_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); + if (not $success) + { + # Failed to stop + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } + + # Server is off now. + $anvil->Job->update_progress({ + progress => 25, + message => "job_0211,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0211", variables => { server_name => $server_name }}); + + # Delete the resource. + $problem = $anvil->Cluster->delete_server({debug => 2, server_name => $server_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Something went wrong + $anvil->Job->update_progress({ + progress => 100, + message => "error_0227,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0227", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } } # Register the job with the peers. @@ -315,7 +384,7 @@ sub remove_from_pacemaker progress => $progress, message => "job_0212,!!host_name!".$host_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0212", variables => { host_name => $host_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0212", variables => { host_name => $host_name }}); $progress += 10; } @@ -352,7 +421,7 @@ sub parse_job_data message => "error_0219,!!job_uuid!".$anvil->data->{switches}{'job-uuid'}."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0219", variables => { job_uuid => $anvil->data->{switches}{'job-uuid'} }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0219", variables => { job_uuid => $anvil->data->{switches}{'job-uuid'} }}); $anvil->nice_exit({exit_code => 1}); } @@ -367,7 +436,7 @@ sub parse_job_data message => "error_0220,!!server_uuid!".$server_uuid."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0220", variables => { server_uuid => $server_uuid }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0220", variables => { server_uuid => $server_uuid }}); $anvil->nice_exit({exit_code => 1}); } @@ -380,10 +449,21 @@ sub parse_job_data { # The cluster isn't running, sleep and exit. $anvil->Job->update_progress({ - progress => 0, + progress => 10, message => "error_0222", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0222"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0222"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); + } + elsif (not $anvil->data->{cib}{parsed}{'local'}{ready}) + { + # We're not a full member (yet) + $anvil->Job->update_progress({ + progress => 10, + message => "error_0238", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0238"}); sleep 10; $anvil->nice_exit({exit_code => 1}); } diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 025907e5..1005bcf2 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -8,6 +8,9 @@ # 0 = Normal exit. # 1 = Any problem that causes an early exit. # +# TODO: Support cloning; Example +# - virt-clone --original-xml /mnt/shared/definitions/.xml --name --file --check path_exists=off +# use strict; use warnings; @@ -1067,28 +1070,19 @@ sub check_drbd_minor_and_port $anvil->nice_exit({exit_code => 1}); } - if ((($anvil->data->{job}{drbd_minor} eq "") or ($anvil->data->{job}{drbd_tcp_port} eq "")) && (not $anvil->data->{job}{peer_mode})) + if (not $anvil->data->{job}{peer_mode}) { - my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({ + # We're primary, so query the minor number and TCP port + # The peer must use the TCP and minor as set in the job + ($anvil->data->{job}{drbd_minor}, $anvil->data->{job}{drbd_tcp_port}) = $anvil->DRBD->get_next_resource({ debug => 2, anvil_uuid => $anvil->data->{job}{anvil_uuid}, resource_name => $anvil->data->{job}{server_name}, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - free_minor => $free_minor, - free_port => $free_port, + 'job::drbd_minor' => $anvil->data->{job}{drbd_minor}, + 'job::drbd_tcp_port' => $anvil->data->{job}{drbd_tcp_port}, }}); - - if ($anvil->data->{job}{drbd_minor} eq "") - { - $anvil->data->{job}{drbd_minor} = $free_minor; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::drbd_minor' => $anvil->data->{job}{drbd_minor} }}); - } - if ($anvil->data->{job}{drbd_tcp_port} eq "") - { - $anvil->data->{job}{drbd_tcp_port} = $free_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::drbd_tcp_port' => $anvil->data->{job}{drbd_tcp_port} }}); - } } # If we don't have a DRBD minor or TCP port, we're stuck. @@ -1614,10 +1608,8 @@ sub interactive_ask_server_name }}); if ($server_state eq "DELETED") { - print "- ".$server_name." ("..")\n"; - print $anvil->Words->string({key => "message_0220", variables => { - server_name => $server_name, - }})."\n"; + ### NOTE: This could get cluttered, so for now we'll not show them. + #print $anvil->Words->string({key => "message_0220", variables => { server_name => $server_name }})."\n"; } else { diff --git a/tools/test.pl b/tools/test.pl index 02512052..98a94eb8 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -26,44 +26,7 @@ $anvil->Get->switches; $anvil->Database->connect({debug => 3}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); -my $key_string = 'scan_drbd_message_0007,!!resource_name!srv00-sql1!!,!!resource_state!#!string!scan_drbd_unit_0004!#!!,!!resource_xml!] from key string: [scan_drbd_message_0007,!!resource_name!srv00-sql1!!,!!resource_state!#!string!scan_drbd_unit_0004!#!!,!!resource_xml! - - - /dev/drbd_srv00-sql1_0 - /dev/mk-a02n01_ssd0/srv00-sql1_0 - internal - -
(null)
-
- - - /dev/drbd_srv00-sql1_0 - /dev/mk-a02n02_ssd0/srv00-sql1_0 - internal - -
(null)
-
- -
10.101.12.1
-
10.101.12.2
-
-
-
-
-
-
!!'; -my $out_string = $anvil->Words->parse_banged_string({ - debug => 2, - key_string => $key_string, -}); - -print "Got: -==== -".$out_string." -==== -"; +my ($minor, $tcp_port) = $anvil->DRBD->get_next_resource({debug => 2}); +print "Next free minor: [".$minor."], tcp port: [".$tcp_port."]\n"; $anvil->nice_exit({exit_code => 0});