From 86228e9d1da9d8381bf51e57143c5a6c953c51e1 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 1 Feb 2021 12:06:40 -0500 Subject: [PATCH 1/4] * Added a check to anvil-delete-server to remove the XML definition file. * Added checks to anvil-provision-server to see if an existing server name is flagged as DELETED, instead of outright rejecting a given server name. Signed-off-by: Digimer --- share/words.xml | 4 ++ tools/anvil-delete-server | 14 +++++ tools/anvil-provision-server | 108 ++++++++++++++++++++++++++++------- tools/test.pl | 38 ++++++++++-- 4 files changed, 138 insertions(+), 26 deletions(-) diff --git a/share/words.xml b/share/words.xml index 9c909832..d3674532 100644 --- a/share/words.xml +++ b/share/words.xml @@ -655,6 +655,8 @@ It should be provisioned in the next minute or two. The server delete is complete on this host! It looks like ScanCore has not yet run on one or both nodes in this Anvil! system. Missing resource data, so unable to proceed. Manually calling 'scan-drbd' to ensure that the new agent is recorded. + The server name: [#!variable!server_name!#] is already used by another server. + Deleting the server's definition file: [#!variable!file!#]... Starting: [#!variable!program!#]. @@ -1654,6 +1656,8 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty The server is running here, assigning the job to this host. Preparing to delete a server. Preparing to migrate a server (or all servers). + - #!variable!server_name!# (Current state: [#!variable!server_state!#]) + - * #!variable!server_name!# (Deleted, name can be reused) Saved the mail server information successfully! diff --git a/tools/anvil-delete-server b/tools/anvil-delete-server index c2e26831..cf716ef5 100755 --- a/tools/anvil-delete-server +++ b/tools/anvil-delete-server @@ -195,6 +195,20 @@ WHERE $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0215"}); } + # Delete the XML definition file. + my $resource_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_file => $resource_file }}); + if (-f $resource_file) + { + # Remove it. + $anvil->Job->update_progress({ + progress => 80, + message => "job_0220,!!file!".$resource_file."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0220", variables => { file => $resource_file }}); + unlink $resource_file; + } + $anvil->Job->update_progress({ progress => 100, message => "job_0216", diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 813852d2..025907e5 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -423,8 +423,8 @@ sub provision_server my ($handle, $return_code) = $anvil->System->call({ background => 1, shell_call => $shell_call, - stdout_file => "/var/log/anvil_server_".$server.".stdout", - stderr_file => "/var/log/anvil_server_".$server.".stderr", + stdout_file => "/var/log/anvil-server_".$server.".stdout", + stderr_file => "/var/log/anvil-server_".$server.".stderr", }); my $pid = $handle->pid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { @@ -1236,14 +1236,24 @@ sub parse_job_data }}); if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}) { - # Duplicate name - $anvil->Job->update_progress({ - progress => 100, - message => "error_0198,!!server_name!".$anvil->data->{job}{server_name}."!!", - job_status => "failed", - }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0198", variables => { server_name => $anvil->data->{job}{server_name} }}); - $anvil->nice_exit({exit_code => 1}); + # Is this name used by a server marked as DELETED? + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state ne "DELETED") + { + # Duplicate name + $anvil->Job->update_progress({ + progress => 100, + message => "error_0198,!!server_name!".$anvil->data->{job}{server_name}."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0198", variables => { server_name => $anvil->data->{job}{server_name} }}); + $anvil->nice_exit({exit_code => 1}); + } } if (not $anvil->data->{job}{server_name}) @@ -1564,7 +1574,8 @@ sub interactive_ask_server_name $anvil->Database->get_servers({debug => 2}); ### TODO: Figure out how many rows we have and break the server list into columns if too long. - my $retry = 0; + my $retry = 0; + my $duplicate = ""; while(1) { my $default = ""; @@ -1580,13 +1591,41 @@ sub interactive_ask_server_name # Show all the current server names. if ($retry) { - print $anvil->Words->string({key => "job_0159"})."\n\n"; + if ($duplicate) + { + print $anvil->Words->string({key => "job_0219", variables => { server_name => $duplicate }})."\n\n"; + $duplicate = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { duplicate => $duplicate }}); + } + else + { + print $anvil->Words->string({key => "job_0159"})."\n\n"; + } } my $anvil_uuid = $anvil->data->{new_server}{anvil_uuid}; print $anvil->Words->string({key => "job_0160", variables => { anvil_name => $anvil->data->{new_server}{anvil_name} }})."\n"; foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}}) { - print "- ".$server_name."\n"; + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state eq "DELETED") + { + print "- ".$server_name." ("..")\n"; + print $anvil->Words->string({key => "message_0220", variables => { + server_name => $server_name, + }})."\n"; + } + else + { + print $anvil->Words->string({key => "message_0219", variables => { + server_name => $server_name, + server_state => $server_state, + }})."\n"; + } } print $terminal->Tgoto('cm', 0, 3)."? "; @@ -1602,14 +1641,41 @@ sub interactive_ask_server_name # Reload in case a new anvil! was saved while we waited. $anvil->Database->get_servers(); - if (($answer) && (not exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer})) + if ($answer) { - # Valid. - $anvil->data->{new_server}{name} = $answer; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new_server::name" => $anvil->data->{new_server}{name}, - }}); - + # Duplicate? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer}) + { + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state eq "DELETED") + { + # Valid, we can re-use deleted server names. + $anvil->data->{new_server}{name} = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::name" => $anvil->data->{new_server}{name}, + }}); + } + else + { + # Invalid, duplicate. + $duplicate = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { duplicate => $duplicate }}); + } + } + else + { + # Valid. + $anvil->data->{new_server}{name} = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::name" => $anvil->data->{new_server}{name}, + }}); + } last; } else @@ -2218,7 +2284,7 @@ sub interactive_ask_server_os }}); # Still here? - $os_list .= " - [".sprintf("%-10s", $os_code)."] - ".$os_name."\n"; + $os_list .= " - ".sprintf("%-10s", $os_code)." - ".$os_name."\n"; } my $retry = 0; diff --git a/tools/test.pl b/tools/test.pl index 0b314e82..1a59d91d 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -26,10 +26,38 @@ $anvil->Get->switches; $anvil->Database->connect({debug => 3}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); -my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({debug => 2, anvil_uuid => "1aded871-fcb1-4473-9b97-6e9c246fc568"}); -$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - free_minor => $free_minor, - free_port => $free_port, -}}); +my $key_string = 'message_0190 +job_0185 +job_0186,!!minor!5!!,!!port!7803!! +job_0188,!!job_uuid!12eeded2-c5bb-4295-8c8e-665bd9c9b83a!!,!!peer_name!mk-a02n01.digimer.ca!! +job_0189,!!lv_path!/dev/mk-a02n02_ssd0/srv02-lab02_0!! +job_0218 +job_0190,!!resource!srv02-lab02!! +job_0191,!!resource!srv02-lab02!! +job_0192 +job_0195 +job_0203,!!resource!srv02-lab02!! +job_0199,!!shell_call!/usr/bin/virt-install --connect qemu:///system \ +--name srv02-lab02 \ + --os-variant win2k19 \ + --memory 8192 \ + --events on_poweroff=destroy,on_reboot=restart \ + --vcpus 6,sockets=1,cores=6 \ + --cpu host \ + --network bridge=ifn1_bridge1,model=virtio \ + --graphics spice \ + --sound ich9 \ + --clock offset=localtime \ + --boot menu=on \ + --disk path=/dev/drbd/by-res/srv02-lab02/0,target.bus=virtio,driver.io=threads,cache=writeback,driver.discard=unmap,boot.order=1 \ + --disk path=/mnt/shared/files/Windows_Server_2019_eval.iso,device=cdrom,shareable=on,boot.order=2 \ + --disk path=/mnt/shared/files/virtio-win-0.1.185.iso,device=cdrom,shareable=on,boot.order=3 --force \ + --noautoconsole --wait -1 > /var/log/anvil-server_srv02-lab02.log +!! +job_0200'; +my ($free_minor, $free_port) = $anvil->Words->parse_banged_string({ + debug => 2, + key_string => $key_string, +}); $anvil->nice_exit({exit_code => 0}); From 37085754857254519b0960f47078223fdc1dfd19 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 1 Feb 2021 12:56:46 -0500 Subject: [PATCH 2/4] * Added a check to anvil-delete-server to remove the XML definition file. * Added checks to anvil-provision-server to see if an existing server name is flagged as DELETED, instead of outright rejecting a given server name. Signed-off-by: Digimer --- share/words.xml | 4 ++ tools/anvil-delete-server | 14 +++++ tools/anvil-provision-server | 108 ++++++++++++++++++++++++++++------- 3 files changed, 105 insertions(+), 21 deletions(-) diff --git a/share/words.xml b/share/words.xml index 9c909832..d3674532 100644 --- a/share/words.xml +++ b/share/words.xml @@ -655,6 +655,8 @@ It should be provisioned in the next minute or two. The server delete is complete on this host! It looks like ScanCore has not yet run on one or both nodes in this Anvil! system. Missing resource data, so unable to proceed. Manually calling 'scan-drbd' to ensure that the new agent is recorded. + The server name: [#!variable!server_name!#] is already used by another server. + Deleting the server's definition file: [#!variable!file!#]... Starting: [#!variable!program!#]. @@ -1654,6 +1656,8 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty The server is running here, assigning the job to this host. Preparing to delete a server. Preparing to migrate a server (or all servers). + - #!variable!server_name!# (Current state: [#!variable!server_state!#]) + - * #!variable!server_name!# (Deleted, name can be reused) Saved the mail server information successfully! diff --git a/tools/anvil-delete-server b/tools/anvil-delete-server index c2e26831..cf716ef5 100755 --- a/tools/anvil-delete-server +++ b/tools/anvil-delete-server @@ -195,6 +195,20 @@ WHERE $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0215"}); } + # Delete the XML definition file. + my $resource_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server_name.".xml"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { resource_file => $resource_file }}); + if (-f $resource_file) + { + # Remove it. + $anvil->Job->update_progress({ + progress => 80, + message => "job_0220,!!file!".$resource_file."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0220", variables => { file => $resource_file }}); + unlink $resource_file; + } + $anvil->Job->update_progress({ progress => 100, message => "job_0216", diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 813852d2..025907e5 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -423,8 +423,8 @@ sub provision_server my ($handle, $return_code) = $anvil->System->call({ background => 1, shell_call => $shell_call, - stdout_file => "/var/log/anvil_server_".$server.".stdout", - stderr_file => "/var/log/anvil_server_".$server.".stderr", + stdout_file => "/var/log/anvil-server_".$server.".stdout", + stderr_file => "/var/log/anvil-server_".$server.".stderr", }); my $pid = $handle->pid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { @@ -1236,14 +1236,24 @@ sub parse_job_data }}); if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}) { - # Duplicate name - $anvil->Job->update_progress({ - progress => 100, - message => "error_0198,!!server_name!".$anvil->data->{job}{server_name}."!!", - job_status => "failed", - }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0198", variables => { server_name => $anvil->data->{job}{server_name} }}); - $anvil->nice_exit({exit_code => 1}); + # Is this name used by a server marked as DELETED? + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state ne "DELETED") + { + # Duplicate name + $anvil->Job->update_progress({ + progress => 100, + message => "error_0198,!!server_name!".$anvil->data->{job}{server_name}."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0198", variables => { server_name => $anvil->data->{job}{server_name} }}); + $anvil->nice_exit({exit_code => 1}); + } } if (not $anvil->data->{job}{server_name}) @@ -1564,7 +1574,8 @@ sub interactive_ask_server_name $anvil->Database->get_servers({debug => 2}); ### TODO: Figure out how many rows we have and break the server list into columns if too long. - my $retry = 0; + my $retry = 0; + my $duplicate = ""; while(1) { my $default = ""; @@ -1580,13 +1591,41 @@ sub interactive_ask_server_name # Show all the current server names. if ($retry) { - print $anvil->Words->string({key => "job_0159"})."\n\n"; + if ($duplicate) + { + print $anvil->Words->string({key => "job_0219", variables => { server_name => $duplicate }})."\n\n"; + $duplicate = ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { duplicate => $duplicate }}); + } + else + { + print $anvil->Words->string({key => "job_0159"})."\n\n"; + } } my $anvil_uuid = $anvil->data->{new_server}{anvil_uuid}; print $anvil->Words->string({key => "job_0160", variables => { anvil_name => $anvil->data->{new_server}{anvil_name} }})."\n"; foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}}) { - print "- ".$server_name."\n"; + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state eq "DELETED") + { + print "- ".$server_name." ("..")\n"; + print $anvil->Words->string({key => "message_0220", variables => { + server_name => $server_name, + }})."\n"; + } + else + { + print $anvil->Words->string({key => "message_0219", variables => { + server_name => $server_name, + server_state => $server_state, + }})."\n"; + } } print $terminal->Tgoto('cm', 0, 3)."? "; @@ -1602,14 +1641,41 @@ sub interactive_ask_server_name # Reload in case a new anvil! was saved while we waited. $anvil->Database->get_servers(); - if (($answer) && (not exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer})) + if ($answer) { - # Valid. - $anvil->data->{new_server}{name} = $answer; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "new_server::name" => $anvil->data->{new_server}{name}, - }}); - + # Duplicate? + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + if (exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer}) + { + my $server_uuid = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$answer}{server_uuid}; + my $server_state = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_state}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_uuid => $server_uuid, + server_state => $server_state, + }}); + if ($server_state eq "DELETED") + { + # Valid, we can re-use deleted server names. + $anvil->data->{new_server}{name} = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::name" => $anvil->data->{new_server}{name}, + }}); + } + else + { + # Invalid, duplicate. + $duplicate = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { duplicate => $duplicate }}); + } + } + else + { + # Valid. + $anvil->data->{new_server}{name} = $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "new_server::name" => $anvil->data->{new_server}{name}, + }}); + } last; } else @@ -2218,7 +2284,7 @@ sub interactive_ask_server_os }}); # Still here? - $os_list .= " - [".sprintf("%-10s", $os_code)."] - ".$os_name."\n"; + $os_list .= " - ".sprintf("%-10s", $os_code)." - ".$os_name."\n"; } my $retry = 0; From 4b9ec561067d3948903719b3747a1d0469d468d4 Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 2 Feb 2021 23:09:47 -0500 Subject: [PATCH 3/4] * Updated DRBD->delete_resource() to return a success if asked to delete a non-existent resource (as can happen when partial anvil-delete-server runs are re-run). * Reworked DRBD->get_next_resource() to pull from the database, and to no longer do that increments-of-three nonsense. Avoidable complexity. Also added a call to Cluster->get_anvil_uuid() if the 'anvil_uuid' parameter wasn't passed. * Updated Database->get_host_from_uuid() and ->get_hosts() to now take 'include_deleted' parameter and default to not returning deleted hosts. This fixed issues where anvil-{delete,provision}-server calls could assign jobs to now-deleted hosts with reused host names. * Updated anvil-delete-server to print log entries to STDOUT. Also updated it to not wait of shutdown of a server in pacemaker to complete, and instead to destroy it after calling pacemaker's resource stop. Updated to also check to see if the server being deleted is already out of pacemaker and, if so, skip that step and directly try to destroy the server, if it's running. * Updated anvil-provision-server to force 'peer_mode' runs to pull their TCP Port and DRBD minor numbers from the job. This fixes a bug where the same resource on two machines could use different TCP ports. Signed-off-by: Digimer --- Anvil/Tools/Cluster.pm | 8 +- Anvil/Tools/DRBD.pm | 208 ++++++++++++++++------------------- Anvil/Tools/Database.pm | 54 +++++++-- Anvil/Tools/Job.pm | 3 +- share/words.xml | 8 +- tools/anvil-delete-server | 186 ++++++++++++++++++++++--------- tools/anvil-provision-server | 30 ++--- tools/test.pl | 41 +------ 8 files changed, 301 insertions(+), 237 deletions(-) diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm index dc97b877..5e3a5477 100644 --- a/Anvil/Tools/Cluster.pm +++ b/Anvil/Tools/Cluster.pm @@ -1694,7 +1694,7 @@ sub parse_cib if ($anvil->Network->is_local({host => $target})) { # Local call - ($cib_data, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + ($cib_data, $return_code) = $anvil->System->call({debug => ($debug + 1), shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { cib_data => $cib_data, return_code => $return_code, @@ -1704,7 +1704,7 @@ sub parse_cib { # Remote call. ($cib_data, my $error, $return_code) = $anvil->Remote->call({ - debug => $debug, + debug => ($debug + 1), shell_call => $shell_call, target => $target, port => $port, @@ -2415,7 +2415,7 @@ sub parse_crm_mon if ($anvil->Network->is_local({host => $target})) { # Local call - ($crm_mon_data, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + ($crm_mon_data, $return_code) = $anvil->System->call({debug => ($debug + 1), shell_call => $shell_call}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { crm_mon_data => $crm_mon_data, return_code => $return_code, @@ -2463,7 +2463,7 @@ sub parse_crm_mon foreach my $resource ($dom->findnodes('/pacemaker-result/resources/resource')) { next if $resource->{resource_agent} ne "ocf::alteeve:server"; - my $id = $resource->{id}; + my $id = $resource->{id}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { id => $id }}); foreach my $variable (sort {$a cmp $b} keys %{$resource}) { diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index 097631b4..e9aa076d 100644 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -278,9 +278,9 @@ sub delete_resource $anvil->DRBD->gather_data({debug => $debug}); if (not exists $anvil->data->{new}{resource}{$resource}) { - # Resource not found. + # Resource not found, so it appears to already be gone. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0228", variables => { resource => $resource }}); - return('!!error!!'); + return(0); } my $waiting = 1; @@ -1100,23 +1100,25 @@ sub get_devices =head2 get_next_resource -This returns the next free DRBD minor number and the next free TCP port. The minor number is the first one found to be free. The TCP port is allocated in steps of three. That is to say, if the last used TCP port is '7790', then '7793' is considered the next free port. This is to ensure that if a DR host is added or used, the three adjacent ports are available for use in one resource configuration. +This returns the next free DRBD minor number and the next free TCP port. The minor number and TCP port returned are ones found to be free on both/all machines in Anvil! system. As such, the returned values may skip values free on any given system. -Minor numbers are not grouped as resources and volumes can be referenced by name, so the DRBD minor number is less important for human users. +If a resource name is given, then the caller can either return an error if the name matches (useful for name conflict checks) or return the first (lowest) minor number and TCP used by the resource. my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({anvil_uuid => "a5ae5242-e9d3-46c9-9ce8-306855aa56db"}) If there is a problem, two empty strings will be returned. +B<< Note >>: Deleted resources, volumes and peers are ignored! As such, a minor or TCP port that used to be used by deleted resource can be returned. + Parameters; -=head3 anvil_uuid (required) +=head3 anvil_uuid (optional, default 'Cluster->get_anvil_uuid') -This is the Anvil! in which we're looking for the next free resources. +This is the Anvil! in which we're looking for the next free resources. It's required, but generally it doesn't need to be specified as we can find it via C<< Cluster->get_anvil_uuid() >>. =head3 resource_name (optional) -If this is set, and the resource is found to already exist, the first DRBD minor number and first used TCP port are returned. Alternatively, if C<< force_unique >> is set to C<< 1 >>, and the resource is found to exist, C<< !!error!! >> is returned. +If this is set, and the resource is found to already exist, the first DRBD minor number and first used TCP port are returned. Alternatively, if C<< force_unique >> is set to C<< 1 >>, and the resource is found to exist, empty strings are returned. =head3 force_unique (optional, default '0') @@ -1131,8 +1133,6 @@ sub get_next_resource my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "DRBD->get_next_resource()" }}); - my $free_minor = ""; - my $free_port = ""; my $anvil_uuid = defined $parameter->{anvil_uuid} ? $parameter->{anvil_uuid} : ""; my $resource_name = defined $parameter->{resource_name} ? $parameter->{resource_name} : ""; my $force_unique = defined $parameter->{force_unique} ? $parameter->{force_unique} : 0; @@ -1142,21 +1142,30 @@ sub get_next_resource force_unique => $force_unique, }}); + # If we weren't passed an anvil_uuid, see if we can find one locally + if (not $anvil_uuid) + { + $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { anvil_uuid => $anvil_uuid }}); + } + if (not $anvil_uuid) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "DRBD->get_next_resource()", parameter => "anvil_uuid" }}); - return($free_minor, $free_port); + return("", ""); } $anvil->Database->get_anvils({debug => $debug}); if (not exists $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}) { $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "error_0162", variables => { anvil_uuid => $anvil_uuid }}); - return($free_minor, $free_port); + return("", ""); } # Read in the resource information from both nodes. They _should_ be identical, but that's not 100% # certain. + my $free_minor = ""; + my $free_port = ""; my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; @@ -1166,26 +1175,53 @@ sub get_next_resource dr1_host_uuid => $dr1_host_uuid, }}); - my $query = " +my $query = " SELECT - scan_drbd_resource_host_uuid, - scan_drbd_resource_name, - scan_drbd_resource_xml + a.host_uuid, + a.host_name, + b.scan_drbd_resource_name, + c.scan_drbd_volume_number, + c.scan_drbd_volume_device_path, + c.scan_drbd_volume_device_minor, + d.scan_drbd_peer_host_name, + d.scan_drbd_peer_ip_address, + d.scan_drbd_peer_protocol, + d.scan_drbd_peer_fencing, + d.scan_drbd_peer_tcp_port FROM - scan_drbd_resources + hosts a, + scan_drbd_resources b, + scan_drbd_volumes c, + scan_drbd_peers d WHERE - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node1_host_uuid)." -OR - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node2_host_uuid)." "; + a.host_uuid = b.scan_drbd_resource_host_uuid +AND + b.scan_drbd_resource_uuid = c.scan_drbd_volume_scan_drbd_resource_uuid +AND + c.scan_drbd_volume_uuid = d.scan_drbd_peer_scan_drbd_volume_uuid +AND + b.scan_drbd_resource_xml != 'DELETED' +AND + c.scan_drbd_volume_device_path != 'DELETED' +AND + d.scan_drbd_peer_connection_state != 'DELETED' +AND + ( + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node1_host_uuid)." + OR + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($node2_host_uuid)." "; if ($dr1_host_uuid) { $query .= " -OR - scan_drbd_resource_host_uuid = ".$anvil->Database->quote($dr1_host_uuid)." "; + OR + scan_drbd_resource_host_uuid = ".$anvil->Database->quote($dr1_host_uuid)." "; } $query .= " + ) ORDER BY - scan_drbd_resource_name ASC + b.scan_drbd_resource_name ASC, + c.scan_drbd_volume_device_minor ASC, + d.scan_drbd_peer_tcp_port ASC ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); @@ -1197,110 +1233,58 @@ ORDER BY }}); foreach my $row (@{$results}) { - my $scan_drbd_resource_host_uuid = $row->[0]; - my $scan_drbd_resource_name = $row->[1]; - my $scan_drbd_resource_xml = $row->[2]; + # I don't really need most of this, but it helps with debugging + my $host_uuid = $row->[0]; + my $host_name = $row->[1]; + my $scan_drbd_resource_name = $row->[2]; + my $scan_drbd_volume_number = $row->[3]; + my $scan_drbd_volume_device_path = $row->[4]; + my $scan_drbd_volume_device_minor = $row->[5]; + my $scan_drbd_peer_host_name = $row->[6]; + my $scan_drbd_peer_ip_address = $row->[7]; + my $scan_drbd_peer_protocol = $row->[8]; + my $scan_drbd_peer_fencing = $row->[9]; + my $scan_drbd_peer_tcp_port = $row->[10]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - scan_drbd_resource_host_uuid => $scan_drbd_resource_host_uuid, - scan_drbd_resource_name => $scan_drbd_resource_name, - scan_drbd_resource_xml => $scan_drbd_resource_xml, + 's1:host_uuid' => $host_uuid, + 's2:host_name' => $host_name, + 's3:scan_drbd_resource_name' => $scan_drbd_resource_name, + 's4:scan_drbd_volume_number' => $scan_drbd_volume_number, + 's5:scan_drbd_volume_device_path' => $scan_drbd_volume_device_path, + 's6:scan_drbd_volume_device_minor' => $scan_drbd_volume_device_minor, + 's7:scan_drbd_peer_host_name' => $scan_drbd_peer_host_name, + 's8:scan_drbd_peer_ip_address' => $scan_drbd_peer_ip_address, + 's9:scan_drbd_peer_protocol' => $scan_drbd_peer_protocol, + 's10:scan_drbd_peer_fencing' => $scan_drbd_peer_fencing, + 's11:scan_drbd_peer_tcp_port' => $scan_drbd_peer_tcp_port, }}); - next if $scan_drbd_resource_xml eq "DELETED"; - - local $@; - my $dom = eval { XML::LibXML->load_xml(string => $scan_drbd_resource_xml); }; - if ($@) - { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "error_0111", variables => { - xml_body => $scan_drbd_resource_xml, - eval_error => $@, - }}); - next; - } - - # Successful parse! - my $local_minor = ""; - my $local_port = ""; - foreach my $name ($dom->findnodes('/resource')) - { - my $resource = $name->{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { resource => $resource }}); - - foreach my $host ($name->findnodes('./host')) - { - my $host_name = $host->{name}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_name => $host_name }}); - - foreach my $volume_vnr ($host->findnodes('./volume')) - { - my $volume = $volume_vnr->{vnr}; - my $minor = $volume_vnr->findvalue('./device/@minor'); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - 's1:host_name' => $host_name." \@ ".$resource."/".$volume, - 's2:minor' => $minor, - }}); - - $anvil->data->{drbd}{used_resources}{minor}{$minor}{used} = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "drbd::used_resources::minor::${minor}::used" => $anvil->data->{drbd}{used_resources}{minor}{$minor}{used}, - }}); - - if (not $local_minor) - { - $local_minor = $minor; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { local_minor => $local_minor }}); - } - } - } - - foreach my $connection ($name->findnodes('./connection')) - { - foreach my $host ($connection->findnodes('./host')) - { - my $host_name = $host->{name}; - my $tcp_port = $host->findvalue('./address/@port'); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - host_name => $host_name, - tcp_port => $tcp_port, - }}); - - $anvil->data->{drbd}{used_resources}{tcp_port}{$tcp_port}{used} = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - "drbd::used_resources::tcp_port::${tcp_port}::used" => $anvil->data->{drbd}{used_resources}{tcp_port}{$tcp_port}{used}, - }}); - - if (not $local_port) - { - $local_port = $tcp_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { local_port => $local_port }}); - } - } - } - } + $anvil->data->{drbd}{used_resources}{minor}{$scan_drbd_volume_device_minor}{used} = 1; + $anvil->data->{drbd}{used_resources}{tcp_port}{$scan_drbd_peer_tcp_port}{used} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "drbd::used_resources::minor::${scan_drbd_volume_device_minor}::used" => $anvil->data->{drbd}{used_resources}{minor}{$scan_drbd_volume_device_minor}{used}, + "drbd::used_resources::tcp_port::${scan_drbd_peer_tcp_port}::used" => $anvil->data->{drbd}{used_resources}{tcp_port}{$scan_drbd_peer_tcp_port}{used}, + }}); - # Is the user looking for this resource? - if (($resource_name) && ($resource_name eq $scan_drbd_resource_name)) + if (($resource_name) && ($scan_drbd_resource_name eq $resource_name)) { - # If we're force_unique, error. + # Found the resource the user was asking for. if ($force_unique) { # Error out. - return('!!error!!'); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, priority => 'err', key => "error_0237", variables => { resource_name => $resource_name }}); + return("", ""); } else { - $free_minor = $local_minor; - $free_port = $local_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - free_minor => $free_minor, - free_port => $free_port, - }}); - return($free_minor, $free_port); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0592", variables => { resource_name => $resource_name }}); + return($scan_drbd_volume_device_minor, $scan_drbd_peer_tcp_port); } } } + # If I'm here, I need to find the next free TCP port. We'll look for the next minor number for this + # host. my $looking = 1; $free_minor = 0; while($looking) @@ -1323,7 +1307,7 @@ ORDER BY { if (exists $anvil->data->{drbd}{used_resources}{tcp_port}{$free_port}) { - $free_port += 3; + $free_port++; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { free_port => $free_port }}); } else diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index b7f4e940..7c3e851e 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -2510,6 +2510,10 @@ Parameters; This is the host UUID we're querying the name of. +=head3 include_deleted (optional, default '0') + +If set to C<< 1 >>, hosts that are deleted are included. If you use this, and a machine was replaced, then watch for multiple host UUIDs. + =head3 short (optional, default '0') If set to C<< 1 >>, the short host name is returned. When set to C<< 0 >>, the full host name is returned. @@ -2523,12 +2527,14 @@ sub get_host_from_uuid my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_host_from_uuid()" }}); - my $host_name = ""; - my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; - my $short = defined $parameter->{short} ? $parameter->{short} : 0; + my $host_name = ""; + my $host_uuid = defined $parameter->{host_uuid} ? $parameter->{host_uuid} : ""; + my $include_deleted = defined $parameter->{include_deleted} ? $parameter->{include_deleted} : 0; + my $short = defined $parameter->{short} ? $parameter->{short} : 0; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - host_uuid => $host_uuid, - short => $short, + host_uuid => $host_uuid, + include_deleted => $include_deleted, + short => $short, }}); if (not $host_uuid) @@ -2538,7 +2544,21 @@ sub get_host_from_uuid return($host_name); } - my $query = "SELECT host_name FROM hosts WHERE host_uuid = ".$anvil->Database->quote($host_uuid).";"; + my $query = " +SELECT + host_name +FROM + hosts +WHERE + host_uuid = ".$anvil->Database->quote($host_uuid); + if (not $include_deleted) + { + $query .= " +AND + host_key != 'DELETED'"; + } + $query .= " +;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); @@ -2592,7 +2612,13 @@ And to simplify look-ups by UUID or name; To prevent some cases of recursion, C<< hosts::loaded >> is set on successful load, and if this is set, this method immediately returns with C<< 0 >>. -This method takes no parameters. +Parameters; + +=head3 include_deleted (optional, default '0') + +By default, hosts that have been deleted (C<< host_key >> set to C<< DELETED >>) are not returned. If this is set to C<< 1 >>, those deleted hosts are included. + +B<< Note >>: Be careful when using this. If a machine was replaced, then there could be two (or more) host UUIDs for a given host name. =cut sub get_hosts @@ -2603,6 +2629,11 @@ sub get_hosts my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Database->get_hosts()" }}); + my $include_deleted = defined $parameter->{include_deleted} ? $parameter->{include_deleted} : 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + include_deleted => $include_deleted, + }}); + # Delete any data from past scans. delete $anvil->data->{hosts}{host_uuid}; delete $anvil->data->{sys}{hosts}{by_uuid}; @@ -2620,7 +2651,14 @@ SELECT host_ipmi, modified_date FROM - hosts + hosts "; + if (not $include_deleted) + { + $query .= " +WHERE + host_key != 'DELETED'"; + } + $query .= " ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); diff --git a/Anvil/Tools/Job.pm b/Anvil/Tools/Job.pm index 56dcfc00..3c1bfa62 100644 --- a/Anvil/Tools/Job.pm +++ b/Anvil/Tools/Job.pm @@ -356,9 +356,10 @@ FROM WHERE job_command LIKE ".$anvil->Database->quote("%".$program."%")." AND - job_progress != '100' + job_progress = 0 AND job_host_uuid = ".$anvil->Database->quote($host_uuid)." +LIMIT 1 ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); diff --git a/share/words.xml b/share/words.xml index d3674532..0835efa6 100644 --- a/share/words.xml +++ b/share/words.xml @@ -310,7 +310,7 @@ Output (if any): Unable to delete the server resource: [#!variable!server_name!#] as this node is not (yet) a full member of the cluster. It looks like to removal of the server resource: [#!variable!server_name!#] failed. The return code should have been '0', but: [#!variable!return_code!#] was returned. The 'pcs' command output, if any, was: [#!variable!output!#]. It looks like to removal of the server resource: [#!variable!server_name!#] failed. Unsafe to proceed with the removal of the server. Please check the logs for more information. - Unable to delete the resource: [#!variable!resource!#] because it wasn't found in DRBD's config. + Unable to delete the resource: [#!variable!resource!#] because it wasn't found in DRBD's config. This can happen is a previous delete partially completed, in which case this is not a problem. One or more peers need us, and we're not allowed to wait. Deletion aborted. The shell call: [#!variable!shell_call!#] was expected to return '0', but instead the return code: [#!variable!return_code!#] was received. The output, if any, was: [#!variable!output!#]. This host is not an Anvil! node or DR host, unable to migrate servers. @@ -319,6 +319,8 @@ Output (if any): Unable to find the target host to migrate to the job UUID: [#!variable!job_uuid!#]. The migration target host: [#!variable!target_host_uuid!#] is either invalid, or doesn't match one of the nodes in this Anvil! system. There appears to be no resource data in the database for the host: [#!variable!host_name!#]. Has ScanCore run and, specifically, has 'scan-hardware' run yet? Unable to provide available resources for this Anvil! system. + The resource name: [#!variable!resource_name!#] already exists, and 'force_unique' is set. This is likely a name conflict, returning '!!error!!'. + This node is not yet fully in the cluster. Sleeping for a bit, then we'll exit. The job will try again shortly after. @@ -657,6 +659,9 @@ It should be provisioned in the next minute or two. Manually calling 'scan-drbd' to ensure that the new agent is recorded. The server name: [#!variable!server_name!#] is already used by another server. Deleting the server's definition file: [#!variable!file!#]... + The server: [#!variable!server_name!#] was not found in the cluster configuration. This can happen if a server was partially deleted and we're trying again. + Preparing to delete the server: [#!variable!server_name!#]. + Using virsh to destroy (force off) the server: [#!variable!server_name!#], if it is still running. Starting: [#!variable!program!#]. @@ -1342,6 +1347,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Deleting the file: [#!variable!file!#]. Wiping the metadata from the DRBD resource: [#!variable!resource!#]. Wiping any file system signatures and then deleting the logical volume: [#!variable!device_path!#]. + The resource name: [#!variable!resource_name!#] was found, returning the first TCP port and minor number. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-delete-server b/tools/anvil-delete-server index cf716ef5..0fee1e94 100755 --- a/tools/anvil-delete-server +++ b/tools/anvil-delete-server @@ -26,23 +26,21 @@ if (($running_directory =~ /^\./) && ($ENV{PWD})) $| = 1; my $anvil = Anvil::Tools->new(); -$anvil->Log->level({set => 2}); -$anvil->Log->secure({set => 1}); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); # Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is # passed directly, it will be used. Otherwise, the password will be read from the database. $anvil->data->{switches}{'job-uuid'} = ""; $anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'} }}); $anvil->Database->connect(); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 3, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try # again after we exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0218"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => "err", key => "error_0218"}); sleep 10; $anvil->nice_exit({exit_code => 1}); } @@ -54,14 +52,14 @@ if (not $anvil->data->{sys}{database}{connections}) # and, if available, the DR host. At this point, the job acts the same regardless of the host. The DRBD # resource will stopped and then have it's metadata wiped, The LV backing the device will be deleted next. -$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid({debug => 2}); +$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'sys::anvil_uuid' => $anvil->data->{sys}{anvil_uuid} }}); # If we don't have a job UUID, try to find one. if (not $anvil->data->{switches}{'job-uuid'}) { # Load the job data. - $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE}); + $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({debug => 2, program => $THIS_FILE}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); } @@ -93,7 +91,7 @@ if ($anvil->data->{switches}{'job-uuid'}) } # Log an exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); $anvil->nice_exit({exit_code => 1}); } @@ -105,7 +103,7 @@ else if (not $anvil->data->{sys}{anvil_uuid}) { # We can't do anything, exit. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, 'print' => 1, level => 0, priority => 'err', key => "error_0217"}); $anvil->nice_exit({exit_code => 1}); } @@ -144,11 +142,17 @@ sub run_jobs remove_from_pacemaker($anvil); } + $anvil->Job->update_progress({ + progress => 25, + message => "job_0222,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0222", variables => { server_name => $server_name }}); + $anvil->Job->update_progress({ progress => 50, message => "job_0213", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0213"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0213"}); ### NOTE: If we're a DR host, and the server wasn't used here, this is expected to fail # Delete the DRBD resource and backing storage @@ -162,7 +166,7 @@ sub run_jobs message => "error_0228,!!resource!".$server_name."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0228", variables => { resource => $server_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0228", variables => { resource => $server_name }}); $anvil->nice_exit({exit_code => 1}); } @@ -170,7 +174,7 @@ sub run_jobs progress => 60, message => "job_0214", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0214"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0214"}); # Make sure the server is flagged as DELETEd. $anvil->Database->get_servers(); @@ -184,7 +188,8 @@ SET server_state = 'DELETED', modified_date = ".$anvil->Database->quote($anvil->data->{sys}{database}{timestamp})." WHERE - server_uuid = ".$anvil->Database->quote($server_uuid).";"; + server_uuid = ".$anvil->Database->quote($server_uuid)." +;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { query => $query }}); $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); @@ -192,7 +197,7 @@ WHERE progress => 70, message => "job_0215", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0215"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0215"}); } # Delete the XML definition file. @@ -205,7 +210,7 @@ WHERE progress => 80, message => "job_0220,!!file!".$resource_file."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0220", variables => { file => $resource_file }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0220", variables => { file => $resource_file }}); unlink $resource_file; } @@ -213,7 +218,7 @@ WHERE progress => 100, message => "job_0216", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0216"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0216"}); return(0); } @@ -235,46 +240,110 @@ sub remove_from_pacemaker progress => 10, message => "job_0210,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0210", variables => { server_name => $server_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0210", variables => { server_name => $server_name }}); - my $problem = $anvil->Cluster->shutdown_server({ - debug => 2, - server => $server_name, - 'wait' => 1, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) + if (not $anvil->data->{cib}{parsed}{data}{server}{$server_name}) { - # Failed to stop. + # Server is already out of the cluster. $anvil->Job->update_progress({ - progress => 100, - message => "error_0223,!!server_name!".$server_name."!!", - job_status => "failed", + progress => 20, + message => "job_0221,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); - $anvil->nice_exit({exit_code => 1}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0211", variables => { server_name => $server_name }}); + + # Force the server off now, just in case it's running outside the cluster + $anvil->Job->update_progress({ + progress => 25, + message => "job_0223,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0223", variables => { server_name => $server_name }}); + my $success = $anvil->Server->shutdown_virsh({ + debug => 2, + force => 1, + 'wait' => 1, + server => $server_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); + if (not $success) + { + # Failed to stop + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } } - - # Server is off now. - $anvil->Job->update_progress({ - progress => 20, - message => "job_0211,!!server_name!".$server_name."!!", - }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0211", variables => { server_name => $server_name }}); - - # Delete the resource. - $problem = $anvil->Cluster->delete_server({debug => 2, server_name => $server_name}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) + elsif ($anvil->data->{cib}{parsed}{data}{server}{$server_name}{status} ne "off") { - # Something went wrong + # As we're going to delete the server, we won't wait. We'll come back here and destroy the + # server if it's still running. + my $problem = $anvil->Cluster->shutdown_server({ + debug => 2, + server => $server_name, + 'wait' => 0, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Failed to stop. + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } + + # Force the server off now. $anvil->Job->update_progress({ - progress => 100, - message => "error_0227,!!server_name!".$server_name."!!", - job_status => "failed", + progress => 20, + message => "job_0223,!!server_name!".$server_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0227", variables => { server_name => $server_name }}); - $anvil->nice_exit({exit_code => 1}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0223", variables => { server_name => $server_name }}); + my $success = $anvil->Server->shutdown_virsh({ + debug => 2, + force => 1, + 'wait' => 1, + server => $server_name, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { success => $success }}); + if (not $success) + { + # Failed to stop + $anvil->Job->update_progress({ + progress => 100, + message => "error_0223,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0223", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } + + # Server is off now. + $anvil->Job->update_progress({ + progress => 25, + message => "job_0211,!!server_name!".$server_name."!!", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0211", variables => { server_name => $server_name }}); + + # Delete the resource. + $problem = $anvil->Cluster->delete_server({debug => 2, server_name => $server_name}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Something went wrong + $anvil->Job->update_progress({ + progress => 100, + message => "error_0227,!!server_name!".$server_name."!!", + job_status => "failed", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0227", variables => { server_name => $server_name }}); + $anvil->nice_exit({exit_code => 1}); + } } # Register the job with the peers. @@ -315,7 +384,7 @@ sub remove_from_pacemaker progress => $progress, message => "job_0212,!!host_name!".$host_name."!!", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "job_0212", variables => { host_name => $host_name }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "job_0212", variables => { host_name => $host_name }}); $progress += 10; } @@ -352,7 +421,7 @@ sub parse_job_data message => "error_0219,!!job_uuid!".$anvil->data->{switches}{'job-uuid'}."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0219", variables => { job_uuid => $anvil->data->{switches}{'job-uuid'} }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0219", variables => { job_uuid => $anvil->data->{switches}{'job-uuid'} }}); $anvil->nice_exit({exit_code => 1}); } @@ -367,7 +436,7 @@ sub parse_job_data message => "error_0220,!!server_uuid!".$server_uuid."!!", job_status => "failed", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0220", variables => { server_uuid => $server_uuid }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0220", variables => { server_uuid => $server_uuid }}); $anvil->nice_exit({exit_code => 1}); } @@ -380,10 +449,21 @@ sub parse_job_data { # The cluster isn't running, sleep and exit. $anvil->Job->update_progress({ - progress => 0, + progress => 10, message => "error_0222", }); - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => 'err', key => "error_0222"}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0222"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); + } + elsif (not $anvil->data->{cib}{parsed}{'local'}{ready}) + { + # We're not a full member (yet) + $anvil->Job->update_progress({ + progress => 10, + message => "error_0238", + }); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => 'err', key => "error_0238"}); sleep 10; $anvil->nice_exit({exit_code => 1}); } diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 025907e5..1005bcf2 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -8,6 +8,9 @@ # 0 = Normal exit. # 1 = Any problem that causes an early exit. # +# TODO: Support cloning; Example +# - virt-clone --original-xml /mnt/shared/definitions/.xml --name --file --check path_exists=off +# use strict; use warnings; @@ -1067,28 +1070,19 @@ sub check_drbd_minor_and_port $anvil->nice_exit({exit_code => 1}); } - if ((($anvil->data->{job}{drbd_minor} eq "") or ($anvil->data->{job}{drbd_tcp_port} eq "")) && (not $anvil->data->{job}{peer_mode})) + if (not $anvil->data->{job}{peer_mode}) { - my ($free_minor, $free_port) = $anvil->DRBD->get_next_resource({ + # We're primary, so query the minor number and TCP port + # The peer must use the TCP and minor as set in the job + ($anvil->data->{job}{drbd_minor}, $anvil->data->{job}{drbd_tcp_port}) = $anvil->DRBD->get_next_resource({ debug => 2, anvil_uuid => $anvil->data->{job}{anvil_uuid}, resource_name => $anvil->data->{job}{server_name}, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - free_minor => $free_minor, - free_port => $free_port, + 'job::drbd_minor' => $anvil->data->{job}{drbd_minor}, + 'job::drbd_tcp_port' => $anvil->data->{job}{drbd_tcp_port}, }}); - - if ($anvil->data->{job}{drbd_minor} eq "") - { - $anvil->data->{job}{drbd_minor} = $free_minor; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::drbd_minor' => $anvil->data->{job}{drbd_minor} }}); - } - if ($anvil->data->{job}{drbd_tcp_port} eq "") - { - $anvil->data->{job}{drbd_tcp_port} = $free_port; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::drbd_tcp_port' => $anvil->data->{job}{drbd_tcp_port} }}); - } } # If we don't have a DRBD minor or TCP port, we're stuck. @@ -1614,10 +1608,8 @@ sub interactive_ask_server_name }}); if ($server_state eq "DELETED") { - print "- ".$server_name." ("..")\n"; - print $anvil->Words->string({key => "message_0220", variables => { - server_name => $server_name, - }})."\n"; + ### NOTE: This could get cluttered, so for now we'll not show them. + #print $anvil->Words->string({key => "message_0220", variables => { server_name => $server_name }})."\n"; } else { diff --git a/tools/test.pl b/tools/test.pl index 02512052..98a94eb8 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -26,44 +26,7 @@ $anvil->Get->switches; $anvil->Database->connect({debug => 3}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); -my $key_string = 'scan_drbd_message_0007,!!resource_name!srv00-sql1!!,!!resource_state!#!string!scan_drbd_unit_0004!#!!,!!resource_xml!] from key string: [scan_drbd_message_0007,!!resource_name!srv00-sql1!!,!!resource_state!#!string!scan_drbd_unit_0004!#!!,!!resource_xml! - - - /dev/drbd_srv00-sql1_0 - /dev/mk-a02n01_ssd0/srv00-sql1_0 - internal - -
(null)
-
- - - /dev/drbd_srv00-sql1_0 - /dev/mk-a02n02_ssd0/srv00-sql1_0 - internal - -
(null)
-
- -
10.101.12.1
-
10.101.12.2
-
-
-
-
-
-
!!'; -my $out_string = $anvil->Words->parse_banged_string({ - debug => 2, - key_string => $key_string, -}); - -print "Got: -==== -".$out_string." -==== -"; +my ($minor, $tcp_port) = $anvil->DRBD->get_next_resource({debug => 2}); +print "Next free minor: [".$minor."], tcp port: [".$tcp_port."]\n"; $anvil->nice_exit({exit_code => 0}); From ff3681c9134da3ebbe1eb668ec3dd038717d1af2 Mon Sep 17 00:00:00 2001 From: Digimer Date: Wed, 3 Feb 2021 12:58:18 -0500 Subject: [PATCH 4/4] * Added support for manually setting the server's UUID in anvil-provision-server. Also, if a server name existed before but was deleted, the old UUID is re-used to provide better continuity. The user can override this behaviour with the new --uuid switch. Signed-off-by: Digimer --- tools/anvil-provision-server | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 1005bcf2..716ff031 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -40,6 +40,7 @@ $anvil->data->{switches}{os} = ""; $anvil->data->{switches}{cpu} = ""; $anvil->data->{switches}{'job-uuid'} = ""; $anvil->data->{switches}{name} = ""; +$anvil->data->{switches}{uuid} = ""; $anvil->data->{switches}{ram} = ""; $anvil->data->{switches}{'storage-group'} = ""; $anvil->data->{switches}{'storage-size'} = ""; @@ -50,6 +51,7 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list 'switches::cpu' => $anvil->data->{switches}{cpu}, 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, 'switches::name' => $anvil->data->{switches}{name}, + 'switches::uuid' => $anvil->data->{switches}{uuid}, 'switches::ram' => $anvil->data->{switches}{ram}, 'switches::storage-group' => $anvil->data->{switches}{'storage-group'}, 'switches::storage-size' => $anvil->data->{switches}{'storage-size'}, @@ -409,6 +411,10 @@ sub provision_server $shell_call .= " --boot menu=on \\\n"; $shell_call .= " --disk path=/dev/drbd/by-res/".$server."/0,target.bus=virtio,driver.io=threads,cache=writeback,driver.discard=unmap,boot.order=1 \\\n"; $shell_call .= " --disk path=".$anvil->data->{job}{install_iso_path}.",device=cdrom,shareable=on,boot.order=2 \\\n"; + if ($anvil->data->{job}{server_uuid}) + { + $shell_call .= " --uuid=".$anvil->data->{job}{server_uuid}." \\\n"; + } if ($anvil->data->{job}{driver_iso_path}) { $shell_call .= " --disk path=".$anvil->data->{job}{driver_iso_path}.",device=cdrom,shareable=on,boot.order=3 --force \\\n"; @@ -1123,6 +1129,11 @@ sub parse_job_data $anvil->data->{job}{server_name} = $1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::server_name' => $anvil->data->{job}{server_name} }}); } + if ($line =~ /server_uuid=(.*)$/) + { + $anvil->data->{job}{server_uuid} = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { 'job::server_uuid' => $anvil->data->{job}{server_uuid} }}); + } if ($line =~ /cpu_cores=(.*)$/) { $anvil->data->{job}{cpu_cores} = $1; @@ -1433,6 +1444,7 @@ sub interactive_question } $anvil->data->{new_server}{name} = $anvil->data->{switches}{name} ? $anvil->data->{switches}{name} : ""; + $anvil->data->{new_server}{uuid} = $anvil->data->{switches}{uuid} ? $anvil->data->{switches}{uuid} : ""; # If this is a node, load the anvil_uuid automatically. @@ -1647,10 +1659,13 @@ sub interactive_ask_server_name }}); if ($server_state eq "DELETED") { - # Valid, we can re-use deleted server names. + # Valid, we can re-use deleted server names. We'll also re-use the + # UUID, if the user didn't specifically specify a UUID. $anvil->data->{new_server}{name} = $answer; + $anvil->data->{new_server}{uuid} = $server_uuid if not $anvil->data->{new_server}{uuid}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "new_server::name" => $anvil->data->{new_server}{name}, + "new_server::uuid" => $anvil->data->{new_server}{uuid}, }}); } else @@ -2396,6 +2411,11 @@ storage_group_uuid=".$anvil->data->{new_server}{storage_group}." storage_size=".$anvil->data->{new_server}{storage_size}." install_iso=".$anvil->data->{new_server}{install_media}." driver_iso=".$anvil->data->{new_server}{driver_disc}; + if ($anvil->data->{new_server}{uuid}) + { + $job_data .= " +server_uuid=".$anvil->data->{new_server}{name}; + } print "\n".$anvil->Words->string({key => "job_0183", variables => { job_data => $job_data }})."\n"; # Register the job with the primary node on the Anvil! (or node 1, if neither node is primary).