From eec14cb013b25fcf24662f796ce8a473f3eb38d7 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sun, 18 Apr 2021 19:54:58 -0400 Subject: [PATCH] * Finished tools/anvil-boot-server and tools/anvil-shutdown-server. * Fixed a bug where, in rare cases, $anvil->hostname() would call 'hostnamectl' and get a dbus error during shutdown, which would then cause the hostname to be changed to the error in the database. * Fixed a bug in Cluster->boot_server() where it would never verify that a server has started successfully. * Updated Database->get_ip_addresses() to store the IPs we manage in 'ip_addresses::::X'. * Updated ocf:alteeve:server to work from command line calls, though more testing is still needed. * Started work on 'anvil-rename-server', but haven't gotten far with it yet. Signed-off-by: Digimer --- Anvil/Tools.pm | 2 + Anvil/Tools/Cluster.pm | 26 +-- Anvil/Tools/Database.pm | 26 ++- Anvil/Tools/Get.pm | 12 ++ Anvil/Tools/Job.pm | 4 +- Anvil/Tools/System.pm | 26 ++- notes | 25 ++- ocf/alteeve/server | 96 ++++++----- share/words.xml | 27 ++- tools/Makefile.am | 1 + tools/anvil-boot-server | 308 +++++++++++++++++++++++++++++++++- tools/anvil-rename-server | 101 ++++++++++++ tools/anvil-safe-stop | 72 ++++++++ tools/anvil-shutdown-server | 321 ++++++++++++++++++++++++++++++++++++ 14 files changed, 983 insertions(+), 64 deletions(-) create mode 100755 tools/anvil-rename-server create mode 100755 tools/anvil-safe-stop diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index ddefb40d..9f2704df 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1027,6 +1027,7 @@ sub _set_paths 'dnf.conf' => "/etc/dnf/dnf.conf", 'firewalld.conf' => "/etc/firewalld/firewalld.conf", 'global-common.conf' => "/etc/drbd.d/global_common.conf", + hostname => "/etc/hostname", hosts => "/etc/hosts", 'httpd.conf' => "/etc/httpd/conf/httpd.conf", 'journald_anvil' => "/etc/systemd/journald.conf.d/anvil.conf", @@ -1087,6 +1088,7 @@ sub _set_paths units => "/usr/lib/systemd/system", }, exe => { + ocf_alteeve => "/usr/lib/ocf/resource.d/alteeve/server", 'anvil-change-password' => "/usr/sbin/anvil-change-password", 'anvil-check-memory' => "/usr/sbin/anvil-check-memory", 'anvil-configure-host' => "/usr/sbin/anvil-configure-host", diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm index d676f804..48621a01 100644 --- a/Anvil/Tools/Cluster.pm +++ b/Anvil/Tools/Cluster.pm @@ -615,7 +615,7 @@ sub boot_server # Is the server already running? If so, do nothing. my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; - my $host = $anvil->data->{cib}{parsed}{data}{server}{$server}{host}; + my $host = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status, host => $host, @@ -667,19 +667,22 @@ sub boot_server while($waiting) { $anvil->Cluster->parse_cib({debug => $debug}); - my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; - my $host = $anvil->data->{cib}{parsed}{data}{server}{$server}{host}; + my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; + my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - status => $status, - host => $host, + status => $status, + host_name => $host_name, }}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0552", variables => { server => $server }}); - if ($host eq "running") + if ($status eq "running") { # It's up. $waiting = 0; - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0553", variables => { server => $server }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0553", variables => { + server => $server, + host_name => $host_name, + }}); } else { @@ -798,7 +801,7 @@ sub delete_server # Is the server running? If so, stop it first. my $status = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{status}; - my $host = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{host}; + my $host = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{host_name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status, host => $host, @@ -895,6 +898,7 @@ sub get_anvil_name return($anvil_name); } + =head2 get_anvil_uuid This returns the C<< anvils >> -> C<< anvil_uuid >> that a host belongs to. If the host is not found in any Anvil!, an empty string is returned. @@ -1611,7 +1615,7 @@ sub migrate_server { $anvil->Cluster->parse_cib({debug => $debug}); my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; - my $host = $anvil->data->{cib}{parsed}{data}{server}{$server}{host}; + my $host = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name}; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status, host => $host, @@ -2714,7 +2718,7 @@ sub shutdown_server # Is the server already stopped? If so, do nothing. my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; - my $host = defined $anvil->data->{cib}{parsed}{data}{server}{$server}{host} ? $anvil->data->{cib}{parsed}{data}{server}{$server}{host} : ""; + my $host = defined $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} ? $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status, host => $host, @@ -2756,7 +2760,7 @@ sub shutdown_server { $anvil->Cluster->parse_cib({debug => $debug}); my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; - my $host = defined $anvil->data->{cib}{parsed}{data}{server}{$server}{host} ? $anvil->data->{cib}{parsed}{data}{server}{$server}{host} : ""; + my $host = defined $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} ? $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { status => $status, host => $host, diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index b1c25b66..4c180f3e 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -2997,6 +2997,12 @@ sub get_ip_addresses # Make sure we've loaded host data. $anvil->Database->get_hosts({debug => $debug}); + # Purge any previously known data. + if (exists $anvil->data->{ip_addresses}) + { + delete $anvil->data->{ip_addresses}; + } + foreach my $host_uuid (keys %{$anvil->data->{hosts}{host_uuid}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { host_uuid => $host_uuid }}); @@ -3116,6 +3122,7 @@ AND $query = " SELECT ip_address_uuid, + ip_address_host_uuid, ip_address_on_type, ip_address_on_uuid, ip_address_address, @@ -3138,11 +3145,14 @@ AND foreach my $row (@{$results}) { my $ip_address_uuid = $row->[0]; - my $ip_address_on_type = $row->[1]; - my $ip_address_on_uuid = $row->[2]; - my $ip_address_address = $row->[3]; - my $ip_address_subnet_mask = $row->[4]; + my $ip_address_host_uuid = $row->[1]; + my $ip_address_on_type = $row->[2]; + my $ip_address_on_uuid = $row->[3]; + my $ip_address_address = $row->[4]; + my $ip_address_subnet_mask = $row->[5]; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + ip_address_uuid => $ip_address_uuid, + ip_address_host_uuid => $ip_address_host_uuid, ip_address_on_type => $ip_address_on_type, ip_address_on_uuid => $ip_address_on_uuid, ip_address_address => $ip_address_address, @@ -3196,6 +3206,14 @@ AND "hosts::host_uuid::${host_uuid}::ip_address::${ip_address_address}::on_interface" => $anvil->data->{hosts}{host_uuid}{$host_uuid}{ip_address}{$ip_address_address}{on_interface}, "hosts::host_uuid::${host_uuid}::ip_address::${ip_address_address}::on_network" => $anvil->data->{hosts}{host_uuid}{$host_uuid}{ip_address}{$ip_address_address}{on_network}, }}); + + # We also want to be able to map IPs to hosts. + $anvil->data->{ip_addresses}{$ip_address_address}{host_uuid} = $ip_address_host_uuid; + $anvil->data->{ip_addresses}{$ip_address_address}{ip_address_uuid} = $ip_address_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "ip_addresses::${ip_address_address}::host_uuid" => $anvil->data->{ip_addresses}{$ip_address_address}{host_uuid}, + "ip_addresses::${ip_address_address}::ip_address_uuid" => $anvil->data->{ip_addresses}{$ip_address_address}{ip_address_uuid}, + }}); } } diff --git a/Anvil/Tools/Get.pm b/Anvil/Tools/Get.pm index 34c144db..7c9c3608 100644 --- a/Anvil/Tools/Get.pm +++ b/Anvil/Tools/Get.pm @@ -1346,6 +1346,18 @@ sub host_name { # The environment variable isn't set. Call 'hostnamectl' on the command line. ($host_name, my $return_code) = $anvil->System->call({debug => 9999, shell_call => $anvil->data->{path}{exe}{hostnamectl}." --static"}); + if ($return_code) + { + # We can't trust the hostname. This could be an error like "Could not get property: + # Refusing activation, D-Bus is shutting down.". Try reading in the '/etc/hostname' + # file instead. + $host_name = $anvil->Storage->read_file({debug => 9999, file => $anvil->data->{path}{configs}{hostname}}); + if ($host_name eq "!!error!!") + { + # Failed to read the file, too. What the hell? Exit out. + print "Failed to query the hostname using 'hostnamectl --static' and failed to read the content of: [".$anvil->data->{path}{configs}{hostname}."]. Something is very wrong, exiting.\n"; + } + } } return($host_name); diff --git a/Anvil/Tools/Job.pm b/Anvil/Tools/Job.pm index 2ce7438a..eacc7728 100644 --- a/Anvil/Tools/Job.pm +++ b/Anvil/Tools/Job.pm @@ -678,7 +678,7 @@ WHERE } } - ### NOTE: This is used by 'anvil-update-system'. + ### NOTE: This is used by 'anvil-update-system'. It should be moved back over to it later. # Insert counts if ($job_status =~ /message_0058/gs) { @@ -704,7 +704,7 @@ WHERE # This is used by 'anvil-download-file' if ($job_status =~ /message_0142/gs) { - ### NOTE: Left off here. + ### NOTE: Is this needed anymore? # my $downloaded = $anvil->data->{counts}{downloaded} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{downloaded}}) : 0; # my $installed = $anvil->data->{counts}{installed} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{installed}}) : 0; # my $verified = $anvil->data->{counts}{verified} ? $anvil->Convert->add_commas({number => $anvil->data->{counts}{verified}}) : 0; diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index ef0e2931..d258054d 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -4639,6 +4639,9 @@ sub update_hosts my $trusted_host_uuids = $anvil->Get->trusted_hosts({debug => $debug}); $anvil->Database->get_ip_addresses({debug => $debug}); + # Load the IPs we manage. If we find any entries for these that we don't expect, we'll remove them. + $anvil->Database->get_ip_addresses({debug => $debug}); + foreach my $host_uuid (keys %{$anvil->data->{hosts}{host_uuid}}) { my $host_name = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_name}; @@ -4694,6 +4697,10 @@ sub update_hosts }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_body => $old_body }}); + # This will track the IPs we've seen. We'll only write these out once, and skip any futher entries + # that may be found. + my $written_ips = {}; + # Parse the existing foreach my $line (split/\n/, $old_body) { @@ -4735,7 +4742,7 @@ sub update_hosts }}); # Make sure the IP is valid. - my $is_ip = $anvil->Validate->ip({ip => $ip_address, debug => $debug}); + my $is_ip = $anvil->Validate->ip({ip => $ip_address, debug => 3}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_ip => $is_ip }}); if (not $is_ip) { @@ -4747,6 +4754,16 @@ sub update_hosts next; } + if (exists $written_ips->{$ip_address}) + { + # Skipping at least one line, rewrite the file. + $changes = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); + next; + } + $written_ips->{$ip_address} = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "written_ips->{".$ip_address."}" => $written_ips->{$ip_address} }}); + foreach my $name (split/\s+/, $names) { # Is this name one we manage? If so, has the IP changed? @@ -4855,7 +4872,8 @@ sub update_hosts { $changes = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); - $new_body .= "\n# ".$anvil->Words->string({key => "message_0178", variables => { date => $anvil->Get->date_and_time({debug => $debug}) }})."\n"; + $new_body .= "\n"; + #$new_body .= "\n# ".$anvil->Words->string({key => "message_0178", variables => { date => $anvil->Get->date_and_time({debug => $debug}) }})."\n"; foreach my $ip_address (@{$ip_order}) { @@ -4864,6 +4882,10 @@ sub update_hosts } } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:changes' => $changes, + 's2:new_body' => $new_body, + }}); if ($changes) { # Write the new file. diff --git a/notes b/notes index 1d336b34..c6d4a50d 100644 --- a/notes +++ b/notes @@ -943,4 +943,27 @@ OS10(config)# interface range ethernet 1/1/15-1/1/24 OS10(conf-range-eth1/1/15-1/1/24)# switchport access vlan 300 OS10(conf-range-eth1/1/15-1/1/24)# exit - +==================================== +-=] Rename a resource (ex: srv09-few-tcpremote1 -> srv09-fea-tcpremote1) +1. pacemaker - +1.1 - Record current settings: + # pcs resource config srv09-few-tcpremote1 + Resource: srv09-few-tcpremote1 (class=ocf provider=alteeve type=server) + Attributes: name=srv09-few-tcpremote1 + Meta Attrs: allow-migrate=true target-role=Stopped + Operations: migrate_from interval=0s timeout=600 (srv09-few-tcpremote1-migrate_from-interval-0s) + migrate_to interval=0s timeout=86400 (srv09-few-tcpremote1-migrate_to-interval-0s) + monitor interval=60 (srv09-few-tcpremote1-monitor-interval-60) + notify interval=0s timeout=20 (srv09-few-tcpremote1-notify-interval-0s) + start interval=0s on-fail=block timeout=300 (srv09-few-tcpremote1-start-interval-0s) + stop interval=0s on-fail=block timeout=86400 (srv09-few-tcpremote1-stop-interval-0s) +1.2 - Delete the resource + + +==================================== + + +# Hosts added or updated by the Anvil! on: [2021/04/17 16:24:52]: +10.201.10.2 Could not get property: Refusing activation, D-Bus is shutting down Could not get property: Refusing activation, D-Bus is shutting down. Could not get property: Refusing activation, D-Bus is shutting down.bcn1 +192.168.122.12 Could not get property: Refusing activation, D-Bus is shutting down.ifn1 +10.101.10.2 Could not get property: Refusing activation, D-Bus is shutting down.sn1 diff --git a/ocf/alteeve/server b/ocf/alteeve/server index c95b1a60..2184ccab 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -108,11 +108,11 @@ if (not $anvil->data->{sys}{database}{connections}) ### Read or Set the environment variables # This is the name of the server we're managing. # Example values: -$anvil->data->{environment}{OCF_RESKEY_name} = defined $ENV{OCF_RESKEY_name} ? $ENV{OCF_RESKEY_name} : ""; # srv07-el6 +$anvil->data->{environment}{OCF_RESKEY_name} = defined $ENV{OCF_RESKEY_name} ? $ENV{OCF_RESKEY_name} : ""; # This is our node name -$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = defined $ENV{OCF_RESKEY_CRM_meta_on_node} ? $ENV{OCF_RESKEY_CRM_meta_on_node} : ""; # mk-a02n01.digimer.ca +$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = defined $ENV{OCF_RESKEY_CRM_meta_on_node} ? $ENV{OCF_RESKEY_CRM_meta_on_node} : ""; # This says "UUID", but it's the node ID. -$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node_uuid} = defined $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} ? $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} : ""; # 1 +$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node_uuid} = defined $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} ? $ENV{OCF_RESKEY_CRM_meta_on_node_uuid} : ""; # Not used here, contains the pacemaker node ID # This is the timeout for the called action in millisecond. $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = defined $ENV{OCF_RESKEY_CRM_meta_timeout} ? $ENV{OCF_RESKEY_CRM_meta_timeout} : ""; # 20000 # If this is set, we'll bump our log level as well. @@ -149,12 +149,16 @@ if ($anvil->data->{environment}{PCMK_debug}) # behaviour. $anvil->data->{environment}{OCF_RESKEY_CRM_meta_stop_drbd_resources} = 0; -# Get any command line switches. +# We're used by anvil-boot-server and anvil-stop-server. They don't set environment variables, but instead +# use switches. Pick those up, if passed. + +$anvil->data->{switches}{migrate_to} = ""; # Sets 'meta_migrate_target' +$anvil->data->{switches}{migrate_from} = ""; # Sets 'meta_migrate_source' When set without 'migrate_to', does a status check after migration +$anvil->data->{switches}{server} = ""; # Sets 'name'. $anvil->Get->switches({debug => 2}); $anvil->Log->level({set => 2}); $anvil->Log->secure({set => 1}); - if ($anvil->data->{switches}{stop_drbd_resources}) { $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = 1; @@ -163,44 +167,60 @@ if ($anvil->data->{switches}{stop_drbd_resources}) # Something for the logs $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, key => "log_0298"}); -### TEST: to be removed later -if ($anvil->data->{switches}{test1}) +=cut + +server --server +-=] Boot (on an-a01n01) +2021/04/17 18:41:30:ocf:alteeve:server:236; environment::OCF_RESKEY_CRM_meta_name: [start] +2021/04/17 18:41:30:ocf:alteeve:server:236; environment::OCF_RESKEY_CRM_meta_on_node: [an-a01n01] + +-=] Stop (on an-a01n01) +2021/04/17 18:33:50:ocf:alteeve:server:236; environment::OCF_RESKEY_CRM_meta_name: [stop] +2021/04/17 18:33:50:ocf:alteeve:server:236; environment::OCF_RESKEY_CRM_meta_on_node: [an-a01n01] + +-=] Migration - source (before - from an-a01n01) +2021/04/17 19:33:12:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a01n01] +2021/04/17 19:33:12:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a01n02] +2021/04/17 19:33:12:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_name: [migrate_to] +2021/04/17 19:33:12:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_on_node: [an-a01n01] + +-=] Migration - target (after - to an-a01n02) +2021/04/17 19:33:19:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a01n01] +2021/04/17 19:33:19:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a01n02] +2021/04/17 19:33:19:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_name: [migrate_from] +2021/04/17 19:33:19:ocf:alteeve:server:196; environment::OCF_RESKEY_CRM_meta_on_node: [an-a01n02] + +=cut + +foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{environment}}) { - $anvil->data->{switches}{migrate_to} = "#!SET!#"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "mk-a02n02"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "mk-a02n01"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_name} = "migrate_to"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "mk-a02n01"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = "600000"; - $anvil->data->{environment}{OCF_RESKEY_name} = "srv07-el6"; - print "Running test 1; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { + "environment::${key}" => $anvil->data->{environment}{$key}, + }}); } -if ($anvil->data->{switches}{test2}) + +# Set environment variables from switches, if otherwise not set. +if (($anvil->data->{switches}{migrate_to}) && (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target})) { - $anvil->data->{switches}{migrate_to} = "#!SET!#"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = "mk-a02n01"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = "mk-a02n02"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_name} = "migrate_to"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "mk-a02n01"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = "600000"; - $anvil->data->{environment}{OCF_RESKEY_name} = "srv07-el6"; - print "Running test 2; Migrate: [".$anvil->data->{environment}{OCF_RESKEY_name}."] from: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}."] to: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}."]\n"; + $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = $anvil->data->{switches}{migrate_to}; + $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = $anvil->data->{switches}{migrate_to}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "environment::OCF_RESKEY_CRM_meta_migrate_target" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target}, + }}); } -if ($anvil->data->{switches}{test3}) +if (($anvil->data->{switches}{migrate_from}) && (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source})) { - $anvil->data->{environment}{OCF_RESKEY_name} = $anvil->data->{switches}{server}; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "mk-a02n01"; - print "Running test 3; Boot or shutdown of: [".$anvil->data->{environment}{OCF_RESKEY_name}."].\n"; + $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = $anvil->data->{switches}{migrate_from}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "environment::OCF_RESKEY_CRM_meta_migrate_source" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}, + }}); } -if ($anvil->data->{switches}{test4}) +if (($anvil->data->{switches}{server}) && (not $anvil->data->{environment}{OCF_RESKEY_name})) { - $anvil->data->{switches}{monitor} = "#!SET!#"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_name} = "monitor"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_fail} = "block"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_timeout} = "20000"; - $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = "mk-a02n01"; - $anvil->data->{environment}{OCF_RESKEY_name} = "srv07-el6"; - print "Status check of: [".$anvil->data->{environment}{OCF_RESKEY_name}."] on: [".$anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node}."].\n"; + $anvil->data->{environment}{OCF_RESKEY_name} = $anvil->data->{switches}{server}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "environment::OCF_RESKEY_name" => $anvil->data->{environment}{OCF_RESKEY_name}, + }}); } # This is for debugging. @@ -639,8 +659,8 @@ shut off - The domain is not running. Usually this indicates the domain has crashed - The domain has crashed, which is always a violent ending. Usually this state can only occur if the domain has been configured not to restart on crash. pmsuspended - The domain has been suspended by guest power management, e.g. entered into s3 state. -=cut +=cut # This boots the server if possible. sub start_server { @@ -958,7 +978,7 @@ sub stop_server my ($anvil) = @_; # Stopping the server is simply a question of "is the server running?" and, if so, stop it. Once - # stopped, we stop the DRBD resource on both nodes. + # stopped, and if enabled, we stop the DRBD resource on both nodes. my $server = $anvil->data->{environment}{OCF_RESKEY_name}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0582", variables => { server => $server }}); diff --git a/share/words.xml b/share/words.xml index fd270be4..d9e6b5b1 100644 --- a/share/words.xml +++ b/share/words.xml @@ -363,6 +363,18 @@ The attempt to start the cluster appears to have failed. The return code '0' was #!variable!output!# ==== + ' or '--server-uuid .]]> + This host is not a node or DR, unable to boot servers. + The definition file: [#!variable!definition_file!#] doesn't exist, unable to boot the server. + This host is not in an Anvil! system, aborting. + The definition file: [#!variable!definition_file!#] exists, but the server: [#!variable!server!#] does not appear to be in the cluster. Unable to boot it. + The server: [#!variable!server!#] status is: [#!variable!status!#]. We can only boot servers that are off, not booting it. + ' or '--server-uuid .]]> + This host is not a node or DR, unable to shut down servers. + This feature isn't enabled on DR hosts yet. + The server: [#!variable!server!#] does not appear to be in the cluster. Unable to shut it down. + The server: [#!variable!server!#] failed to boot. The reason why should be in the logs. + The server: [#!variable!server!#] failed to shut down. The reason why should be in the logs. @@ -757,6 +769,19 @@ It should be provisioned in the next minute or two. Failed to add/confirmed the filter in lvm.conf! This should be corrected later by 'scan-drbd' though. The cluster isn't up. Provisioning the server will hold until it is. Will check every 10 seconds. The cluster is up. + The cluster is not started yet, waiting. Will check again shortly. + The cluster is up, but waiting for this node to become ready. Will check again shortly. + The cluster is up and the node is ready. + The server: [#!variable!server!#] has booted! + Done! + Booting server(s)... + Shutting down server(s)... + The server: [#!variable!server!#] is already off, nothing to do. + The server: [#!variable!server!#] has shut down. + The server: [#!variable!server!#] has been asked to stop. You may need to verify that it is actually stopped (some OSes ignore power button events). + The server: [#!variable!server!#] has been asked to boot. It should come up soon. + The server: [#!variable!server!#] will now be booted... + The server: [#!variable!server!#] will now be asked to shut down. If the server doesn't stop, please log into it and make sure it reacted to the power button event. Shut it down manually, if needed. Starting: [#!variable!program!#]. @@ -1399,7 +1424,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Waiting for the server: [#!variable!server!#] to finish migrating to the node: [#!variable!requested_node!#]... The migration of the server: [#!variable!server!#] to the node: [#!variable!requested_node!#] is complete! Waiting for the server: [#!variable!server!#] to boot... - The server: [#!variable!server!#] has booted! + The server: [#!variable!server!#] has booted on: [#!variable!host_name!#]! Waiting for the server: [#!variable!server!#] to shut down... The server: [#!variable!server!#] is now off. The server: [#!variable!server!#] (#!variable!server_uuid!#) has a definition change: diff --git a/tools/Makefile.am b/tools/Makefile.am index d1baa2db..cbb93175 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -23,6 +23,7 @@ dist_sbin_SCRIPTS = \ anvil-migrate-server \ anvil-parse-fence-agents \ anvil-provision-server \ + anvil-rename-server \ anvil-safe-start \ anvil-scan-network \ anvil-shutdown-server \ diff --git a/tools/anvil-boot-server b/tools/anvil-boot-server index 9bca8f8b..1355210b 100755 --- a/tools/anvil-boot-server +++ b/tools/anvil-boot-server @@ -7,6 +7,9 @@ # 0 = Normal exit. # 1 = No database connection. # +# TODO: +# - Add support for boot ordering. +# use strict; use warnings; @@ -24,12 +27,19 @@ $| = 1; my $anvil = Anvil::Tools->new(); -$anvil->data->{switches}{'job-uuid'} = ""; +$anvil->data->{switches}{'job-uuid'} = ""; +$anvil->data->{switches}{'no-wait'} = ""; # When set, we'll not wait when we boot a single server +$anvil->data->{switches}{'server'} = ""; +$anvil->data->{switches}{'server-uuid'} = ""; +$anvil->data->{switches}{'wait'} = ""; # When set, we'll wait for each server we boot when using '--all' $anvil->Get->switches; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, + 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, + 'switches::no-wait' => $anvil->data->{switches}{'no-wait'}, + 'switches::server' => $anvil->data->{switches}{'server'}, + 'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, + 'switches::wait' => $anvil->data->{switches}{'wait'}, }}); $anvil->Database->connect(); @@ -43,5 +53,293 @@ if (not $anvil->data->{sys}{database}{connections}) $anvil->nice_exit({exit_code => 1}); } -# Pick up the job details -load_job_data($anvil); +# If we don't have a job UUID, try to find one. +if (not $anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); +} + +if ($anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->Job->clear(); + $anvil->Job->get_job_details(); + $anvil->Job->update_progress({ + progress => 1, + job_picked_up_by => $$, + job_picked_up_at => time, + message => "job_0282", + }); + + # Pull out the job data. + foreach my $line (split/\n/, $anvil->data->{jobs}{job_data}) + { + if ($line =~ /server=(.*?)$/) + { + $anvil->data->{switches}{'server'} = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server' => $anvil->data->{switches}{'server'}, + }}); + } + if ($line =~ /server-uuid=(.*?)$/) + { + $anvil->data->{switches}{'server-uuid'} = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, + }}); + } + } +} + +# Now check that we have a server. If it's a server_uuid, read the server name. +if ($anvil->data->{switches}{'server-uuid'}) +{ + # Convert the server_uuid to a server_name. + my $query = "SELECT server_name FROM servers WHERE server_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'server-uuid'}).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + + my $server_name = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $server_name = "" if not defined $server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }}); + + if ($server_name) + { + $anvil->data->{switches}{'server'} = $server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server' => $anvil->data->{switches}{'server'}, + }}); + } +} + +# Do we have a server name? +if (not $anvil->data->{switches}{'server'}) +{ + # Unable to proceed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0257"}); + $anvil->Job->update_progress({progress => 100, message => "error_0257"}); + $anvil->nice_exit({exit_code => 1}); +} + +# Are we a node or DR host? +$anvil->data->{sys}{host_type} = $anvil->Get->host_type(); +if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type} ne "dr")) +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0258"}); + $anvil->Job->update_progress({progress => 100, message => "error_0258"}); + $anvil->nice_exit({exit_code => 1}); +} + +### TODO: Add DR support. For now, this only works on Nodes in a cluster +if ($anvil->data->{sys}{host_type} eq "dr") +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"}); + $anvil->Job->update_progress({progress => 100, message => "error_0265"}); + $anvil->nice_exit({exit_code => 1}); +} + +# Make sure that we're in an Anvil! system. +$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); +if (not $anvil->data->{sys}{anvil_uuid}) +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"}); + $anvil->Job->update_progress({progress => 100, message => "error_0260"}); + $anvil->nice_exit({exit_code => 1}); +} + +# Wait for pacemaker to be up. +wait_for_pacemaker($anvil); + +# If 'server' is 'all', boot all servers. +if (lc($anvil->data->{switches}{'server'}) eq "all") +{ + boot_all_servers($anvil); +} +else +{ + my $wait = $anvil->data->{switches}{'no-wait'} ? 0 : 1; + boot_server($anvil, $anvil->data->{switches}{'server'}, $wait, 50); +} + +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"}); +$anvil->Job->update_progress({progress => 100, message => "job_0281"}); + +$anvil->nice_exit({exit_code => 0}); + + +############################################################################################################# +# Functions # +############################################################################################################# + +sub wait_for_pacemaker +{ + my ($anvil) = @_; + + # Boot the server using pcs, but of course, wait for the node to be up. + my $waiting = 1; + while($waiting) + { + my $problem = $anvil->Cluster->parse_cib({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if (not $problem) + { + my $node_name = $anvil->data->{cib}{parsed}{'local'}{name}; + my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ready => $ready }}); + if ($ready) + { + # We're good. + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0279"}); + $anvil->Job->update_progress({progress => 15, message => "job_0279"}); + } + else + { + # Node isn't ready yet. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0278"}); + $anvil->Job->update_progress({progress => 10, message => "job_0278"}); + } + } + else + { + # Cluster hasn't started. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"}); + $anvil->Job->update_progress({progress => 5, message => "job_0277"}); + } + if ($waiting) + { + sleep 10; + } + } + + return(0); +} + +sub boot_server +{ + my ($anvil, $server, $wait, $progress) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server => $server, + 'wait' => $wait, + progress => $progress, + }}); + + # Verify that the server's XML file exists. + my $definition_file = $anvil->data->{path}{directories}{shared}{definitions}."/".$server.".xml"; + if (not -e $definition_file) + { + # No XML, no boot + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0259", variables => { definition_file => $definition_file }}); + $anvil->Job->update_progress({progress => 100, message => "error_0259,!!definition_file!".$definition_file."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + + if (not exists $anvil->data->{cib}{parsed}{data}{server}{$server}) + { + # XML exists, but it's not in the cluster. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0261", variables => { + server => $server, + definition_file => $definition_file, + }}); + $anvil->Job->update_progress({progress => 100, message => "error_0261,!!definition_file!".$definition_file."!!,!!server!".$server."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + + my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { status => $status }}); + if ($status ne "off") + { + # It's not off, can't boot it. + if ($status eq "running") + { + # Some other state. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0548", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "log_0548,!!server!".$server."!!"}); + return(0); + } + else + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0262", variables => { + server => $server, + status => $status, + }}); + $anvil->Job->update_progress({progress => 100, message => "error_0262,!!status!".$status."!!,!!server!".$server."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + } + + # Now boot. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0288", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0288,!!server!".$server."!!"}); + my $problem = $anvil->Cluster->boot_server({ + debug => 2, + server => $server, + 'wait' => $wait, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Failed, abort. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0267", variables => { server => $server }}); + $anvil->Job->update_progress({progress => 100, message => "error_0267,!!server!".$server."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + else + { + if ($wait) + { + # Booted! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0280", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0280,!!server!".$server."!!"}); + } + else + { + # Boot requested + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0287", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0287,!!server!".$server."!!"}); + } + } + + return(0); +} + +sub boot_all_servers +{ + my ($anvil) = @_; + + ### TODO: Manage the boot order here. + # We top out at 90, bottom is 20. + my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}}; + my $increment = int(70 / $server_count); + my $percent = 15; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_count => $server_count, + increment => $increment, + }}); + foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}}) + { + my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; + my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name}; + my $role = $anvil->data->{cib}{parsed}{data}{server}{$server}{role}; + my $active = $anvil->data->{cib}{parsed}{data}{server}{$server}{active}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:server' => $server, + 's2:status' => $status, + 's2:host_name' => $host_name, + 's4:role' => $role, + 's5:active' => $active, + }}); + + if ($status eq "off") + { + # Boot it. + my $wait = $anvil->data->{switches}{'wait'} ? 1 : 0; + $percent += $increment; + boot_server($anvil, $server, $wait, $percent); + } + } + + return(0); +} diff --git a/tools/anvil-rename-server b/tools/anvil-rename-server new file mode 100755 index 00000000..326bba3e --- /dev/null +++ b/tools/anvil-rename-server @@ -0,0 +1,101 @@ +#!/usr/bin/perl +# +# This renames a server (and the DRBD resources and LVs below it). Given the nature of this program, it runs +# on the node directly, and SSH's into the peer(s) to update the DRBD config files and rename LVs. Normally, +# this should run on Node 1. +# +# Exit codes; +# 0 = Normal exit. +# 1 = Any problem that causes an early exit. +# +# TODO: +# + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); + +# Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is +# passed directly, it will be used. Otherwise, the password will be read from the database. +$anvil->data->{switches}{'job-uuid'} = ""; +$anvil->data->{switches}{'new-name'} = ""; +$anvil->data->{switches}{'old-name'} = ""; +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::new-name' => $anvil->data->{switches}{'new-name'}, + 'switches::old-name' => $anvil->data->{switches}{'old-name'}, + 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, +}}); + +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +if (not $anvil->data->{sys}{database}{connections}) +{ + # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try + # again after we exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0077"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); +} + +# If we don't have a job UUID, try to find one. +if (not $anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); +} + +# If we still don't have a job-uuit, go into interactive mode. +if ($anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->Job->clear(); + $anvil->Job->get_job_details(); + $anvil->Job->update_progress({ + progress => 1, + job_picked_up_by => $$, + job_picked_up_at => time, + message => "message_0190", + }); + + # Job data will be in $anvil->data->{jobs}{job_data} + run_jobs($anvil); +} +else +{ + # Interactive! + interactive_question($anvil); +} + + +$anvil->nice_exit({exit_code => 0}); + + +############################################################################################################# +# Functions # +############################################################################################################# + +# This actually provisions a VM. +sub run_jobs +{ + my ($anvil) = @_; + + + + return(0); +} diff --git a/tools/anvil-safe-stop b/tools/anvil-safe-stop new file mode 100755 index 00000000..750431f0 --- /dev/null +++ b/tools/anvil-safe-stop @@ -0,0 +1,72 @@ +#!/usr/bin/perl +# +# This does shutdown-time tasks; migrate or stop servers, withdraw and power off the host. +# +# Exit codes; +# 0 = Normal exit. +# 1 = Any problem that causes an early exit. +# +# TODO: +# + +use strict; +use warnings; +use Anvil::Tools; +require POSIX; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); + +# Make sure we're running as 'root' +# $< == real UID, $> == effective UID +if (($< != 0) && ($> != 0)) +{ + # Not root + print $anvil->Words->string({key => "error_0005"})."\n"; + $anvil->nice_exit({exit_code => 1}); +} + +# Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks +# is to setup the database server. +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); + +# If I have no databases, sleep until I do +if (not $anvil->data->{sys}{database}{connections}) +{ + # If this is a dashboard, try to configure and then connect to the local database. If this isn't a + # Wait until we have one. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0075"}); + + until($anvil->data->{sys}{database}{connections}) + { + sleep 10; + + $anvil->refresh(); + $anvil->Database->connect(); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); + if (not $anvil->data->{sys}{database}{connections}) + { + # Keep waiting + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 3, secure => 0, key => "log_0439"}); + } + } +} + + +$anvil->nice_exit({exit_code => 0}); + +############################################################################################################# +# Functions # +############################################################################################################# diff --git a/tools/anvil-shutdown-server b/tools/anvil-shutdown-server index e69de29b..7f03dc6b 100755 --- a/tools/anvil-shutdown-server +++ b/tools/anvil-shutdown-server @@ -0,0 +1,321 @@ +#!/usr/bin/perl +# +# This program shuts downs a server (or servers). It can be called as either a job from the webui or directly +# from another program or a terminal. +# +# Exit codes; +# 0 = Normal exit. +# 1 = No database connection. +# +# TODO: +# - We need to support shutdown ordering (inverese of boot ordering) +# + +use strict; +use warnings; +use Anvil::Tools; + +my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0]; +my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0]; +if (($running_directory =~ /^\./) && ($ENV{PWD})) +{ + $running_directory =~ s/^\./$ENV{PWD}/; +} + +# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete. +$| = 1; + +my $anvil = Anvil::Tools->new(); + +$anvil->data->{switches}{'job-uuid'} = ""; +$anvil->data->{switches}{'no-wait'} = ""; # When set, we'll not wait when we shut down a single server +$anvil->data->{switches}{'server'} = ""; +$anvil->data->{switches}{'server-uuid'} = ""; +$anvil->data->{switches}{'wait'} = ""; # When set, we'll wait for each server to shut down when using '--all' +$anvil->Get->switches; +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); +$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, + 'switches::no-wait' => $anvil->data->{switches}{'no-wait'}, + 'switches::server' => $anvil->data->{switches}{'server'}, + 'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, + 'switches::wait' => $anvil->data->{switches}{'wait'}, +}}); + +$anvil->Database->connect(); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +if (not $anvil->data->{sys}{database}{connections}) +{ + # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try + # again after we exit. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0077"}); + sleep 10; + $anvil->nice_exit({exit_code => 1}); +} + +# If we don't have a job UUID, try to find one. +if (not $anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->data->{switches}{'job-uuid'} = $anvil->Job->get_job_uuid({program => $THIS_FILE}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::job-uuid" => $anvil->data->{switches}{'job-uuid'} }}); +} + +if ($anvil->data->{switches}{'job-uuid'}) +{ + # Load the job data. + $anvil->Job->clear(); + $anvil->Job->get_job_details(); + $anvil->Job->update_progress({ + progress => 1, + job_picked_up_by => $$, + job_picked_up_at => time, + message => "job_0283", + }); + + # Pull out the job data. + foreach my $line (split/\n/, $anvil->data->{jobs}{job_data}) + { + if ($line =~ /server=(.*?)$/) + { + $anvil->data->{switches}{'server'} = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server' => $anvil->data->{switches}{'server'}, + }}); + } + if ($line =~ /server-uuid=(.*?)$/) + { + $anvil->data->{switches}{'server-uuid'} = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'}, + }}); + } + } +} + +# Now check that we have a server. If it's a server_uuid, read the server name. +if ($anvil->data->{switches}{'server-uuid'}) +{ + # Convert the server_uuid to a server_name. + my $query = "SELECT server_name FROM servers WHERE server_uuid = ".$anvil->Database->quote($anvil->data->{switches}{'server-uuid'}).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + + my $server_name = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $server_name = "" if not defined $server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_name => $server_name }}); + + if ($server_name) + { + $anvil->data->{switches}{'server'} = $server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::server' => $anvil->data->{switches}{'server'}, + }}); + } +} + +# Do we have a server name? +if (not $anvil->data->{switches}{'server'}) +{ + # Unable to proceed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0263"}); + $anvil->Job->update_progress({progress => 100, message => "error_0263"}); + $anvil->nice_exit({exit_code => 1}); +} + +# Are we a node or DR host? +$anvil->data->{sys}{host_type} = $anvil->Get->host_type(); +if (($anvil->data->{sys}{host_type} ne "node") && ($anvil->data->{sys}{host_type} ne "dr")) +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0264"}); + $anvil->Job->update_progress({progress => 100, message => "error_0264"}); + $anvil->nice_exit({exit_code => 1}); +} + +### TODO: Add DR support. For now, this only works on Nodes in a cluster +if ($anvil->data->{sys}{host_type} eq "dr") +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0265"}); + $anvil->Job->update_progress({progress => 100, message => "error_0265"}); + $anvil->nice_exit({exit_code => 1}); +} + +# Make sure that we're in an Anvil! system. +$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid(); +if (not $anvil->data->{sys}{anvil_uuid}) +{ + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"}); + $anvil->Job->update_progress({progress => 100, message => "error_0260"}); + $anvil->nice_exit({exit_code => 1}); +} + +# This is copied from anvil-boot-server, but it works here as well. We can't use 'pcs' without pacemaker +# being up. +wait_for_pacemaker($anvil); + +# If 'server' is 'all', boot all servers. +if (lc($anvil->data->{switches}{'server'}) eq "all") +{ + shutdown_all_servers($anvil); +} +else +{ + my $wait = $anvil->data->{switches}{'no-wait'} ? 0 : 1; + shutdown_server($anvil, $anvil->data->{switches}{'server'}, $wait, 50); +} + +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"}); +$anvil->Job->update_progress({progress => 100, message => "job_0281"}); + +$anvil->nice_exit({exit_code => 0}); + + +############################################################################################################# +# Functions # +############################################################################################################# + +sub wait_for_pacemaker +{ + my ($anvil) = @_; + + # Boot the server using pcs, but of course, wait for the node to be up. + my $waiting = 1; + while($waiting) + { + my $problem = $anvil->Cluster->parse_cib({debug => 2}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if (not $problem) + { + my $node_name = $anvil->data->{cib}{parsed}{'local'}{name}; + my $ready = $anvil->data->{cib}{parsed}{data}{node}{$node_name}{node_state}{ready}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { ready => $ready }}); + if ($ready) + { + # We're good. + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0279"}); + $anvil->Job->update_progress({progress => 15, message => "job_0279"}); + } + else + { + # Node isn't ready yet. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0278"}); + $anvil->Job->update_progress({progress => 10, message => "job_0278"}); + } + } + else + { + # Cluster hasn't started. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"}); + $anvil->Job->update_progress({progress => 5, message => "job_0277"}); + } + if ($waiting) + { + sleep 10; + } + } + + return(0); +} + +sub shutdown_server +{ + my ($anvil, $server, $wait, $progress) = @_; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server => $server, + 'wait' => $wait, + progress => $progress, + }}); + + # Is the server in the cluster? + if (not exists $anvil->data->{cib}{parsed}{data}{server}{$server}) + { + # Nope. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "log_0548", variables => { server => $server }}); + $anvil->Job->update_progress({progress => 100, message => "log_0548,!!server!".$server."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + + my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { status => $status }}); + if ($status eq "off") + { + # It's off already + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0284", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0284,!!server!".$server."!!"}); + return(0); + } + + # Now shut down. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0289", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0289,!!server!".$server."!!"}); + my $problem = $anvil->Cluster->shutdown_server({ + debug => 2, + server => $server, + 'wait' => $wait, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + # Failed, abort. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0268", variables => { server => $server }}); + $anvil->Job->update_progress({progress => 100, message => "error_0268,!!server!".$server."!!"}); + $anvil->nice_exit({exit_code => 1}); + } + else + { + if ($wait) + { + # Stopped! + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0285", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0285,!!server!".$server."!!"}); + } + else + { + # Stop requested. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0286", variables => { server => $server }}); + $anvil->Job->update_progress({progress => $progress, message => "job_0286,!!server!".$server."!!"}); + } + } + + return(0); +} + +sub shutdown_all_servers +{ + my ($anvil) = @_; + + ### TODO: Manage the stop order here, inverse of boot order. + # We top out at 90, bottom is 20. + my $server_count = keys %{$anvil->data->{cib}{parsed}{data}{server}}; + my $increment = int(70 / $server_count); + my $percent = 15; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_count => $server_count, + increment => $increment, + }}); + foreach my $server (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{data}{server}}) + { + my $status = $anvil->data->{cib}{parsed}{data}{server}{$server}{status}; + my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$server}{host_name}; + my $role = $anvil->data->{cib}{parsed}{data}{server}{$server}{role}; + my $active = $anvil->data->{cib}{parsed}{data}{server}{$server}{active}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:server' => $server, + 's2:status' => $status, + 's2:host_name' => $host_name, + 's4:role' => $role, + 's5:active' => $active, + }}); + + if ($status ne "off") + { + # Shut it down (don't wait). + my $wait = $anvil->data->{switches}{'wait'} ? 1 : 0; + $percent += $increment; + shutdown_server($anvil, $server, $wait, $percent); + } + } + + return(0); +}