From 16c20ae69c80fa1c1569a00bd5696f4865090160 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sat, 5 Jun 2021 14:32:26 -0400 Subject: [PATCH 1/3] * Updated Tools->catch_sig() to use return code 0 instead of 255 so that systemd doesn't think our daemons failed on stop. * Updated Cluster->parse_cib() to not require a database connection (part of the work to make ocf:alteeve:server run without a DB) * WIP: Continuing work on the ocf:alteeve:server RA to run without database connections. * Updated the scancore daemon to explcitely check that all scan agent schemas are loaded in all databases on startup. This is to resolve resync issues on rebuilt strikers that may not yet have some schemas loaded when a DB resync runs. Signed-off-by: Digimer --- Anvil/Tools.pm | 4 +- Anvil/Tools/Cluster.pm | 10 +- Anvil/Tools/Database.pm | 47 ++++----- ocf/alteeve/server | 205 ++++++++++++---------------------------- tools/scancore | 37 +++++++- 5 files changed, 125 insertions(+), 178 deletions(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index a41a3797..4fbaa9db 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1355,7 +1355,9 @@ sub catch_sig $anvil->System->call({shell_call => $anvil->data->{path}{exe}{stty}." ".$anvil->data->{sys}{terminal}{stty}}); } } - $anvil->nice_exit({exit_code => 255}); + + # Exit with '0' so shutdowns from systemd doesn't think we failed. + $anvil->nice_exit({exit_code => 0}); } diff --git a/Anvil/Tools/Cluster.pm b/Anvil/Tools/Cluster.pm index 74ec45ee..c9d4de06 100644 --- a/Anvil/Tools/Cluster.pm +++ b/Anvil/Tools/Cluster.pm @@ -3001,10 +3001,11 @@ sub parse_cib } else { - # It's our peer. + # It's our peer. Note that we only get the peer's host UUID if we have a DB + # connection. This method is called by ocf:alteeve:anvil which skips the DB. $anvil->data->{cib}{parsed}{peer}{ready} = $ready; $anvil->data->{cib}{parsed}{peer}{name} = $node_name; - $anvil->data->{cib}{parsed}{peer}{host_uuid} = $anvil->Get->host_uuid_from_name({debug => $debug, host_name => $node_name}); + $anvil->data->{cib}{parsed}{peer}{host_uuid} = $anvil->data->{sys}{database}{connections} ? $anvil->Get->host_uuid_from_name({debug => $debug, host_name => $node_name}) : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "cib::parsed::peer::ready" => $anvil->data->{cib}{parsed}{peer}{ready}, "cib::parsed::peer::name" => $anvil->data->{cib}{parsed}{peer}{name}, @@ -3183,8 +3184,9 @@ sub parse_cib # Stopping $status = $active ? "running" : "off"; - # If the role is NOT 'migating', check to see if it's marked as such in the database. - if ($role ne "migrating") + # If the role is NOT 'migrating', and we have a database connection, check to see if + # it's marked as such in the database. + if (($role ne "migrating") && ($anvil->data->{sys}{database}{connections})) { $anvil->Database->get_servers({debug => $debug}); my $anvil_uuid = $anvil->Cluster->get_anvil_uuid({debug => $debug}); diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index c7c6be96..8e1285c9 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -1732,15 +1732,6 @@ sub connect # } } - if ($sensitive) - { - # Return here. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "sys::database::connections" => $anvil->data->{sys}{database}{connections}, - }}); - return($anvil->data->{sys}{database}{connections}); - } - # Make sure my host UUID is valod $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::host_uuid" => $anvil->data->{sys}{host_uuid} }}); if ($anvil->data->{sys}{host_uuid} !~ /^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$/) @@ -1752,23 +1743,33 @@ sub connect $anvil->Database->disconnect({debug => $debug}); } + # If this is a time sensitive call, end here. + if ($sensitive) + { + # Return here. + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "sys::database::connections" => $anvil->data->{sys}{database}{connections}, + }}); + return($anvil->data->{sys}{database}{connections}); + } + # If we have a previous count and the new count is higher, resync. if (exists $anvil->data->{sys}{database}{last_db_count}) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::last_db_count" => $anvil->data->{sys}{database}{last_db_count}, "sys::database::connections" => $anvil->data->{sys}{database}{connections}, }}); if ($anvil->data->{sys}{database}{connections} > $anvil->data->{sys}{database}{last_db_count}) { $check_for_resync = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { check_for_resync => $check_for_resync }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { check_for_resync => $check_for_resync }}); } } # If we have a "last_db_count" and it's the lower than the current number of connections, check for a # resync. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::connections" => $anvil->data->{sys}{database}{connections}, check_for_resync => $check_for_resync, }}); @@ -1787,11 +1788,11 @@ sub connect # Hold if a lock has been requested. $anvil->Database->locking({debug => $debug}); - # Mark that we're not active. + # Mark that we're now active. $anvil->Database->mark_active({debug => $debug, set => 1}); # Sync the database, if needed. - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "sys::database::resync_needed" => $anvil->data->{sys}{database}{resync_needed}, check_for_resync => $check_for_resync, }}); @@ -1832,8 +1833,8 @@ sub disconnect # Clear locks and mark that we're done running. if (not $marked_inactive) { - $anvil->Database->mark_active({set => 0}); - $anvil->Database->locking({release => 1}); + $anvil->Database->mark_active({debug => $debug, set => 0}); + $anvil->Database->locking({debug => $debug, release => 1}); $marked_inactive = 1; } @@ -1846,7 +1847,7 @@ sub disconnect delete $anvil->data->{sys}{database}{timestamp}; delete $anvil->data->{sys}{database}{read_uuid}; delete $anvil->data->{sys}{database}{identifier}; - $anvil->Database->read({set => "delete"}); + $anvil->Database->read({debug => $debug, set => "delete"}); # Delete any database information (reconnects should re-read anvil.conf anyway). delete $anvil->data->{database}; @@ -15161,7 +15162,7 @@ sub resync_databases # Add the host column. $query = "INSERT INTO public.$table ($host_column, $uuid_column, ".$columns."modified_date) VALUES (".$anvil->Database->quote($anvil->data->{sys}{host_uuid}).", ".$anvil->Database->quote($row_uuid).", ".$values.$anvil->Database->quote($modified_date)."::timestamp AT TIME ZONE 'UTC');"; } - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0460", variables => { uuid => $anvil->data->{database}{$uuid}{host}, query => $query }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0460", variables => { uuid => $anvil->data->{database}{$uuid}{host}, query => $query }}); ### NOTE: After an archive operationg, a record can ### end up in the public schema while nothing @@ -15185,14 +15186,14 @@ sub resync_databases query => $query, }}); $query =~ s/INSERT INTO public./INSERT INTO history./; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); push @{$anvil->data->{db_resync}{$uuid}{history}{sql}}, $query; } else { # No problem, record the query in the array - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); push @{$anvil->data->{db_resync}{$uuid}{public}{sql}}, $query; } } # if not exists @@ -15240,7 +15241,7 @@ sub resync_databases # Add the host column. $query = "INSERT INTO history.$table ($host_column, $uuid_column, ".$columns."modified_date) VALUES (".$anvil->Database->quote($anvil->data->{sys}{host_uuid}).", ".$anvil->Database->quote($row_uuid).", ".$values.$anvil->Database->quote($modified_date)."::timestamp AT TIME ZONE 'UTC');"; } - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0460", variables => { uuid => $anvil->data->{database}{$uuid}{host}, query => $query }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0460", variables => { uuid => $anvil->data->{database}{$uuid}{host}, query => $query }}); # Now record the query in the array push @{$anvil->data->{db_resync}{$uuid}{history}{sql}}, $query; @@ -16519,7 +16520,7 @@ ORDER BY # { # # Resync needed. # my $difference = $anvil->data->{sys}{database}{table}{$table}{last_updated} - $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{last_updated}; -# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { +# $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { # "s1:difference" => $anvil->Convert->add_commas({number => $difference }), # "s2:sys::database::table::${table}::last_updated" => $anvil->data->{sys}{database}{table}{$table}{last_updated}, # "s3:sys::database::table::${table}::uuid::${uuid}::last_updated" => $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{last_updated}, @@ -16540,7 +16541,7 @@ ORDER BY { # Resync needed. my $difference = ($anvil->data->{sys}{database}{table}{$table}{row_count} - $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{row_count}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "s1:difference" => $anvil->Convert->add_commas({number => $difference }), "s2:sys::database::table::${table}::row_count" => $anvil->data->{sys}{database}{table}{$table}{row_count}, "s3:sys::database::table::${table}::uuid::${uuid}::row_count" => $anvil->data->{sys}{database}{table}{$table}{uuid}{$uuid}{row_count}, diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 9948bfb2..4190d172 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -143,13 +143,23 @@ $anvil->data->{environment}{OCF_RESKEY_CRM_meta_stop_drbd_resources} = 0; # We're used by anvil-boot-server and anvil-stop-server. They don't set environment variables, but instead # use switches. Pick those up, if passed. -$anvil->data->{switches}{migrate_to} = ""; # Sets 'meta_migrate_target' -$anvil->data->{switches}{migrate_from} = ""; # Sets 'meta_migrate_source' When set without 'migrate_to', does a status check after migration -$anvil->data->{switches}{server} = ""; # Sets 'name'. -$anvil->data->{switches}{start} = ""; -$anvil->data->{switches}{stop} = ""; -$anvil->data->{switches}{monitor} = ""; +$anvil->data->{switches}{migrate_to} = ""; # Sets 'meta_migrate_target' +$anvil->data->{switches}{'migrate-to'} = ""; +$anvil->data->{switches}{migrate_from} = ""; # Sets 'meta_migrate_source' When set without 'migrate_to', does a status check after migration +$anvil->data->{switches}{'migrate-from'} = ""; +$anvil->data->{switches}{server} = ""; # Sets 'name'. +$anvil->data->{switches}{start} = ""; +$anvil->data->{switches}{stop} = ""; +$anvil->data->{switches}{monitor} = ""; $anvil->Get->switches(); +if (($anvil->data->{switches}{'migrate-to'}) && not ($anvil->data->{switches}{migrate_to})) +{ + $anvil->data->{switches}{migrate_to} = $anvil->data->{switches}{'migrate-to'}; +} +if (($anvil->data->{switches}{'migrate-from'}) && not ($anvil->data->{switches}{migrate_from})) +{ + $anvil->data->{switches}{migrate_from} = $anvil->data->{switches}{'migrate-from'}; +} $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "switches::migrate_to" => $anvil->data->{switches}{migrate_to}, "switches::migrate_from" => $anvil->data->{switches}{migrate_from}, @@ -161,13 +171,13 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list # If we can connect to a database, we'll set/clear the 'migrating' flag during migrations. For timing reasons # we don't let the RA do resyncs. -$anvil->Database->connect({sensitive => 1}); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); -if (not $anvil->data->{sys}{database}{connections}) -{ - # No databases, - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "warning_0073"}); -} +# $anvil->Database->connect({sensitive => 1}); +# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +# if (not $anvil->data->{sys}{database}{connections}) +# { +# # No databases, +# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "warning_0073"}); +# } if ($anvil->data->{switches}{stop_drbd_resources}) { @@ -180,64 +190,24 @@ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level =cut Start: -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] environment::OCF_RESKEY_CRM_meta_name: [start] environment::OCF_RESKEY_CRM_meta_on_fail: [block] environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [300000] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_ROOT: [/usr/lib/ocf] -Monitor after start: +Monitor: environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] environment::OCF_RESKEY_CRM_meta_name: [monitor] environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [20000] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_ROOT: [/usr/lib/ocf] - -Monitor one minute later: - -environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] -environment::OCF_RESKEY_CRM_meta_name: [monitor] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] -environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_ROOT: [/usr/lib/ocf] Migrate from an-a02n01 to an-a02n02 -environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] -environment::OCF_RESKEY_CRM_meta_name: [monitor] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] -environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_ROOT: [/usr/lib/ocf] -<2 seconds later> environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a02n01] environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a02n02] environment::OCF_RESKEY_CRM_meta_name: [migrate_to] @@ -247,120 +217,59 @@ environment::OCF_RESKEY_CRM_meta_record_pending: [true] environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [86400000] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_ROOT: [/usr/lib/ocf] +# Post migration on an-a02n01; stop is called: -Post migration on an-a02n01 - -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] -environment::OCF_RESKEY_CRM_meta_name: [stop] -environment::OCF_RESKEY_CRM_meta_on_fail: [block] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] -environment::OCF_RESKEY_CRM_meta_timeout: [86400000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::PCMK_debug: [0] - - -Post migration on an-a02n02 +Post migration on an-a02n02: environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a02n01] environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a02n02] environment::OCF_RESKEY_CRM_meta_name: [migrate_from] environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [600000] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_PROVIDER: [alteeve] -environment::OCF_RESOURCE_TYPE: [server] -environment::OCF_ROOT: [/usr/lib/ocf] -environment::PCMK_debug: [0] - -Checking server state after: [srv02-c8s-fujitsu] was migrated to this host. - -environment::OCF_EXIT_REASON_PREFIX: [ocf-exit-reason:] -environment::OCF_RA_VERSION_MAJOR: [1] -environment::OCF_RA_VERSION_MINOR: [0] -environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] -environment::OCF_RESKEY_CRM_meta_name: [monitor] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] -environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_crm_feature_set: [3.7.1] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_PROVIDER: [alteeve] -environment::OCF_RESOURCE_TYPE: [server] -environment::OCF_ROOT: [/usr/lib/ocf] -environment::PCMK_debug: [0] - -Monitor on an-a02n02 after a minute +Checking server state after: [srv02-c8s-fujitsu] was migrated to an-a02n02; -environment::OCF_EXIT_REASON_PREFIX: [ocf-exit-reason:] -environment::OCF_RA_VERSION_MAJOR: [1] -environment::OCF_RA_VERSION_MINOR: [0] environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] environment::OCF_RESKEY_CRM_meta_name: [monitor] environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_crm_feature_set: [3.7.1] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_PROVIDER: [alteeve] -environment::OCF_RESOURCE_TYPE: [server] -environment::OCF_ROOT: [/usr/lib/ocf] -environment::PCMK_debug: [0] +Stop server (on an-a02n02): -Stop server: - -environment::OCF_EXIT_REASON_PREFIX: [ocf-exit-reason:] -environment::OCF_RA_VERSION_MAJOR: [1] -environment::OCF_RA_VERSION_MINOR: [0] -environment::OCF_RESKEY_CRM_meta_migrate_source: [] -environment::OCF_RESKEY_CRM_meta_migrate_target: [] environment::OCF_RESKEY_CRM_meta_name: [stop] environment::OCF_RESKEY_CRM_meta_on_fail: [block] environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_record_pending: [] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] environment::OCF_RESKEY_CRM_meta_timeout: [86400000] -environment::OCF_RESKEY_crm_feature_set: [3.7.1] environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_INSTANCE: [srv02-c8s-fujitsu] -environment::OCF_RESOURCE_PROVIDER: [alteeve] -environment::OCF_RESOURCE_TYPE: [server] -environment::OCF_ROOT: [/usr/lib/ocf] -environment::PCMK_debug: [0] =cut foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{environment}}) { - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { "environment::${key}" => $anvil->data->{environment}{$key}, }}); } +foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{switches}}) +{ + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 1, list => { + "switches::${key}" => $anvil->data->{switches}{$key}, + }}); +} # Set environment variables from switches, if otherwise not set. +if (($anvil->data->{switches}{server}) && (not $anvil->data->{environment}{OCF_RESKEY_name})) +{ + $anvil->data->{environment}{OCF_RESKEY_name} = $anvil->data->{switches}{server}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "environment::OCF_RESKEY_name" => $anvil->data->{environment}{OCF_RESKEY_name}, + }}); +} if (($anvil->data->{switches}{migrate_to}) && (not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target})) { $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_target} = $anvil->data->{switches}{migrate_to}; @@ -376,13 +285,6 @@ if (($anvil->data->{switches}{migrate_from}) && (not $anvil->data->{environment} "environment::OCF_RESKEY_CRM_meta_migrate_source" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source}, }}); } -if (($anvil->data->{switches}{server}) && (not $anvil->data->{environment}{OCF_RESKEY_name})) -{ - $anvil->data->{environment}{OCF_RESKEY_name} = $anvil->data->{switches}{server}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - "environment::OCF_RESKEY_name" => $anvil->data->{environment}{OCF_RESKEY_name}, - }}); -} # This is for debugging. if (not $anvil->data->{switches}{monitor}) @@ -486,7 +388,8 @@ sub check_daemons { my ($anvil, $task) = @_; - my $problem = $anvil->Cluster->parse_cib(); + my $problem = $anvil->Cluster->parse_cib({debug => 3}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { problem => $problem }}); if ($problem) { # Pacemaker isn't running, or some other problem. Someone must have called this script @@ -498,7 +401,7 @@ sub check_daemons # Is the peer running? We'll use this to know whether to try and start daemons on the peer. my $peer_name = $anvil->data->{cib}{parsed}{peer}{name}; my $peer_ready = $anvil->data->{cib}{parsed}{peer}{ready}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { peer_name => $peer_name, peer_ready => $peer_ready, }}); @@ -513,7 +416,7 @@ sub check_daemons my $running_peer = 0; my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code, }}); @@ -522,7 +425,7 @@ sub check_daemons # It is stopped, start it.. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0482", variables => { daemon => $daemon }}); my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code, }}); @@ -532,7 +435,7 @@ sub check_daemons until ($running) { my ($output, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, return_code => $return_code, }}); @@ -588,7 +491,7 @@ sub check_daemons target => $peer_name, shell_call => $anvil->data->{path}{exe}{systemctl}." start ".$daemon, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, error => $error, return_code => $return_code, @@ -602,7 +505,7 @@ sub check_daemons target => $peer_name, shell_call => $anvil->data->{path}{exe}{systemctl}." status ".$daemon, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { output => $output, error => $error, return_code => $return_code, @@ -847,6 +750,14 @@ sub start_server my $server = $anvil->data->{environment}{OCF_RESKEY_name}; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0303", variables => { server => $server }}); + if ((not $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node}) && ($anvil->data->{switches}{target})) + { + $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node} = $anvil->data->{switches}{target}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "environment::OCF_RESKEY_CRM_meta_on_node" => $anvil->data->{environment}{OCF_RESKEY_CRM_meta_on_node}, + }}); + } + # Make sure things are sane. validate_all($anvil); diff --git a/tools/scancore b/tools/scancore index eaade962..a5bba1c1 100755 --- a/tools/scancore +++ b/tools/scancore @@ -229,7 +229,7 @@ sub prepare_for_run $anvil->Storage->read_config(); $anvil->Get->switches(); $anvil->Words->read(); - $anvil->Database->connect(); + $anvil->Database->connect({check_for_resync => 1}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0132"}); # See if the mail server needs to be updated. @@ -243,7 +243,9 @@ sub wait_for_database { my ($anvil) = @_; - $anvil->Database->connect({check_for_resync => 1}); + # Don't check for resync here as we may need to load agent schemas. We'll check for resync in the + # main loop. + $anvil->Database->connect({check_for_resync => 0}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { @@ -259,7 +261,7 @@ sub wait_for_database $anvil->_set_paths(); $anvil->_set_defaults(); $anvil->Storage->read_config(); - $anvil->Database->connect({check_for_resync => 1}); + $anvil->Database->connect({check_for_resync => 0}); if ($anvil->data->{sys}{database}{connections}) { # We're good @@ -323,6 +325,35 @@ sub startup_tasks $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, key => "log_0572"}); + # Make sure all agents schemas are loaded so that resyncs where a table on one DB doesn't exist on + # another, causing a fault. + $anvil->ScanCore->_scan_directory({directory => $anvil->data->{path}{directories}{scan_agents}}); + foreach my $scan_agent (sort {$a cmp $b} keys %{$anvil->data->{scancore}{agent}}) + { + my $schema_file = $anvil->data->{path}{directories}{scan_agents}."/".$scan_agent."/".$scan_agent.".sql"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + scan_agent => $scan_agent, + schema_file => $schema_file, + }}); + if (-e $schema_file) + { + my $tables = $anvil->Database->get_tables_from_schema({debug => 3, schema_file => $schema_file}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { tables => $tables }}); + + my $table_count = @{$tables}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { table_count => $table_count }}); + + # It's possible that some agents don't have a database (or use core database tables only) + if (@{$tables} > 0) + { + $anvil->Database->check_agent_data({ + agent => $scan_agent, + tables => $tables, + }); + } + } + } + # Update our status $anvil->Database->get_hosts({debug => 3}); my $host_uuid = $anvil->Get->host_uuid(); From e15c1651edb7f032233263a83c6f962e0dfa27f5 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sat, 5 Jun 2021 19:07:25 -0400 Subject: [PATCH 2/3] * Fixed a bug with deleting bad keys where jobs to delete keys on non-dashboard machine wasn't being assigned to the proper target machine. * Fixed a bug with anvil-manage-keys where a state_uuid entry recorded on one database may not be read from a machine reading from another database. Signed-off-by: Digimer --- cgi-bin/striker | 71 +++++++++++++++++++++++++++++------------ tools/anvil-manage-keys | 30 +++++++++++------ 2 files changed, 71 insertions(+), 30 deletions(-) diff --git a/cgi-bin/striker b/cgi-bin/striker index 9358960d..455b76f5 100755 --- a/cgi-bin/striker +++ b/cgi-bin/striker @@ -1282,13 +1282,13 @@ sub process_keys { my ($anvil) = @_; - ### NOTE: This doesn't update Striker (the Alteeve) stack yet, just the base OK. + ### NOTE: This doesn't update Striker (the Alteeve) stack yet, just the base OS. my $show_list = 1; $anvil->data->{cgi}{'delete'}{value} = "" if not defined $anvil->data->{cgi}{'delete'}{value}; if ($anvil->data->{cgi}{'delete'}{value}) { # Record the job! - my $job_data = ""; + my $job_data = {}; foreach my $key (sort {$a cmp $b} keys %{$anvil->data->{cgi}}) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { key => $key }}); @@ -1296,34 +1296,63 @@ sub process_keys { my $state_uuid = $1; $show_list = 0; - $job_data .= $state_uuid.","; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { state_uuid => $state_uuid, show_list => $show_list, - job_data => $job_data, + }}); + + my $query = "SELECT state_host_uuid FROM states WHERE state_uuid = ".$anvil->Database->quote($state_uuid).";"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + + my $host_uuid = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $host_uuid = "" if not defined $host_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_uuid => $host_uuid }}); + next if not $host_uuid; + + if (not exists $job_data->{$host_uuid}) + { + $job_data->{$host_uuid} = ""; + } + $job_data->{$host_uuid} .= $state_uuid.","; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "job_data->{$host_uuid}" => $job_data->{$host_uuid}, }}); } } - $job_data =~ s/,$//; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_data => $job_data }}); - if ($job_data) + my $show_template = 0; + foreach my $host_uuid (keys %{$job_data}) { - my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ - file => $THIS_FILE, - line => __LINE__, - job_command => $anvil->data->{path}{exe}{'anvil-manage-keys'}.$anvil->Log->switches, - job_data => $job_data, - job_name => "manage::broken_keys", - job_title => "job_0056", - job_description => "job_0057", - job_progress => 0, - job_host_uuid => $anvil->data->{sys}{host_uuid}, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + $job_data->{$host_uuid} =~ s/,$//; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "job_data->{$host_uuid}" => $job_data->{$host_uuid}, + }}); - # We don't need to store anything as hidden variables, we'll read it back from the database - # later. + if ($job_data->{$host_uuid}) + { + $show_template = 1; + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + file => $THIS_FILE, + line => __LINE__, + job_command => $anvil->data->{path}{exe}{'anvil-manage-keys'}.$anvil->Log->switches, + job_data => $job_data->{$host_uuid}, + job_name => "manage::broken_keys", + job_title => "job_0056", + job_description => "job_0057", + job_progress => 0, + job_host_uuid => $host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + show_template => $show_template, + job_uuid => $job_uuid, + }}); + } + } + + if ($show_template) + { + # We don't need to store anything as hidden variables, we'll read it back from the + # database later. $anvil->data->{form}{body} = $anvil->Template->get({file => "striker.html", name => "job recorded", variables => { title_id => "", message_id => "", diff --git a/tools/anvil-manage-keys b/tools/anvil-manage-keys index e091a965..d9b7e752 100755 --- a/tools/anvil-manage-keys +++ b/tools/anvil-manage-keys @@ -41,8 +41,8 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list 'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'}, }}); -$anvil->Database->connect(); -$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); +$anvil->Database->connect({check_for_resync => 1}); +$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); if (not $anvil->data->{sys}{database}{connections}) { # No databases, update the job, sleep for a bit and then exit. The daemon will pick it up and try @@ -85,13 +85,25 @@ WHERE state_uuid = ".$anvil->Database->quote($state_uuid)." ;"; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); - my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); - my $count = @{$results}; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - results => $results, - count => $count, - }}); - if (not $count) + + # States aren't sync'ed, so we may need to check both/all DBs to find our data. + my $state_found = 0; + my $results = []; + foreach my $uuid (keys %{$anvil->data->{cache}{database_handle}}) + { + $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if ($count) + { + $state_found = 1; + last; + } + } + if (not $state_found) { # No bad keys found on this host. $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0078"}); From 96fffb0b9621cd32c3ca2fdf096434cd9cdc0d67 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sun, 6 Jun 2021 00:01:11 -0400 Subject: [PATCH 3/3] * Finished updating ocf:alteeve:server to no longer require a database connection. To do this, and still be able to track live migration times, the Server->migrate_virsh() method now writes out the server name and migration time to a /tmp/anvil/migration-duration.. file. This file is checked for by the scan-server resource agent and, when found, is parsed and the migration duration is recorded, then the file is purged. * Updated anvil-daemon to have a new function called "handle_special_cases" called during startup that does any weird bug mitigation required. For now, this is used to mitigate against rhbz#1961562, though certainly it will be used for other reasons later. Signed-off-by: Digimer --- Anvil/Tools/Server.pm | 110 ++++++++++++++---------- ocf/alteeve/server | 90 +++---------------- scancore-agents/scan-server/scan-server | 66 ++++++++++++++ tools/anvil-daemon | 36 +++++++- 4 files changed, 177 insertions(+), 125 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index 6a9b0b0d..14e5aec6 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -983,8 +983,8 @@ sub migrate_virsh }); } - ### NOTE: This method is called by ocf:alteeve:server, which is allowed to operate without database - ### access. As such, queries need to be run only if we've got one or more DB connections. + ### NOTE: This method is called by ocf:alteeve:server, which operates without database access. As + ### such, queries need to be run only if we've got one or more DB connections. # Mark this server as being in a migration state. if ($anvil->data->{sys}{database}{connections}) { @@ -1113,19 +1113,22 @@ WHERE $success = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { success => $success }}); - # Revert the server state and update the server host. - my $server_host_uuid = $anvil->Get->host_uuid_from_name({debug => $debug, host_name => $target}); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_host_uuid => $server_host_uuid }}); - if (not $server_host_uuid) + # Update the server state, if we have a database connection. + if ($anvil->data->{sys}{database}{connections}) { - # We didn't find the target's host_uuid, so use the old one and let scan-server - # handle it. - $server_host_uuid = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_host_uuid}; + # Revert the server state and update the server host. + my $server_host_uuid = $anvil->Get->host_uuid_from_name({debug => $debug, host_name => $target}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_host_uuid => $server_host_uuid }}); - } - if (($server_uuid) && ($anvil->data->{sys}{database}{connections})) - { - my $query = " + if (not $server_host_uuid) + { + # We didn't find the target's host_uuid, so use the old one and let scan-server + # handle it. + $server_host_uuid = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_host_uuid}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { server_host_uuid => $server_host_uuid }}); + } + if ($server_uuid) + { + my $query = " UPDATE servers SET @@ -1135,41 +1138,54 @@ SET WHERE server_uuid = ".$anvil->Database->quote($server_uuid)." ;"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); - $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); - $anvil->Database->insert_or_update_servers({ - debug => $debug, - server_uuid => $server_uuid, - server_name => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name}, - server_anvil_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_anvil_uuid}, - server_user_stop => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_user_stop}, - server_start_after_server_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_after_server_uuid}, - server_start_delay => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_delay}, - server_host_uuid => $server_host_uuid, - server_state => $old_server_state, - server_live_migration => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_live_migration}, - server_pre_migration_file_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_pre_migration_file_uuid}, - server_pre_migration_arguments => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_pre_migration_arguments}, - server_post_migration_file_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_post_migration_file_uuid}, - server_post_migration_arguments => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_post_migration_arguments}, - server_ram_in_use => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_ram_in_use}, - server_configured_ram => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_configured_ram}, - server_updated_by_user => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_updated_by_user}, - server_boot_time => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_boot_time}, - }); - - # Record the migration time. - my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ - file => $THIS_FILE, - line => __LINE__, - variable_name => "server::migration_duration", - variable_value => $migration_took, - variable_default => "", - variable_description => "message_0236", - variable_section => "servers", - variable_source_uuid => $server_uuid, - variable_source_table => "servers", + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { query => $query }}); + $anvil->Database->write({query => $query, source => $THIS_FILE, line => __LINE__}); + $anvil->Database->insert_or_update_servers({ + debug => $debug, + server_uuid => $server_uuid, + server_name => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name}, + server_anvil_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_anvil_uuid}, + server_user_stop => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_user_stop}, + server_start_after_server_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_after_server_uuid}, + server_start_delay => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_start_delay}, + server_host_uuid => $server_host_uuid, + server_state => $old_server_state, + server_live_migration => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_live_migration}, + server_pre_migration_file_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_pre_migration_file_uuid}, + server_pre_migration_arguments => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_pre_migration_arguments}, + server_post_migration_file_uuid => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_post_migration_file_uuid}, + server_post_migration_arguments => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_post_migration_arguments}, + server_ram_in_use => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_ram_in_use}, + server_configured_ram => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_configured_ram}, + server_updated_by_user => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_updated_by_user}, + server_boot_time => $anvil->data->{servers}{server_uuid}{$server_uuid}{server_boot_time}, + }); + + # Record the migration time. + my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ + file => $THIS_FILE, + line => __LINE__, + variable_name => "server::migration_duration", + variable_value => $migration_took, + variable_default => "", + variable_description => "message_0236", + variable_section => "servers", + variable_source_uuid => $server_uuid, + variable_source_table => "servers", + }); + } + } + else + { + # There's no database, so write the migration time to a temp file. + my $body = "server_name=".$server.",migration_took=".$migration_took."\n"; + my $file = "/tmp/anvil/migration-duration.".$server.".".time; + my ($failed) = $anvil->Storage->write_file({ + file => $file, + body => $body, + mode => "0666", }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { failed => $failed }}); } } diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 4190d172..d21b0dae 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -8,6 +8,10 @@ # WARNING: This is a pretty purpose-specific resource agent. No effort was made to test this on an rgmanager # cluster or on any configuration outside how the Anvil! m3 uses it. If you plan to adapt it to # another purpose, let us know and we'll try to help. +# +# NOTE: This method, for the sake of speed and reliability, does not connect to the Anvil! database. If you +# do work on this RA, be sure that a check is made for database connections before SQL calls are made +# in module methods. # # Based on: https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc # @@ -169,16 +173,6 @@ $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list "switches::monitor" => $anvil->data->{switches}{monitor}, }}); -# If we can connect to a database, we'll set/clear the 'migrating' flag during migrations. For timing reasons -# we don't let the RA do resyncs. -# $anvil->Database->connect({sensitive => 1}); -# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0132"}); -# if (not $anvil->data->{sys}{database}{connections}) -# { -# # No databases, -# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "warning_0073"}); -# } - if ($anvil->data->{switches}{stop_drbd_resources}) { $anvil->data->{environment}{OCF_RESKEY_CRM_meta_migrate_source} = 1; @@ -187,65 +181,19 @@ if ($anvil->data->{switches}{stop_drbd_resources}) # Something for the logs $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 2, key => "log_0298"}); -=cut -Start: - -environment::OCF_RESKEY_CRM_meta_name: [start] -environment::OCF_RESKEY_CRM_meta_on_fail: [block] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_timeout: [300000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] - -Monitor: - -environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_name: [monitor] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] - -Migrate from an-a02n01 to an-a02n02 - -environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_name: [migrate_to] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [1] -environment::OCF_RESKEY_CRM_meta_record_pending: [true] -environment::OCF_RESKEY_CRM_meta_stop_drbd_resources: [0] -environment::OCF_RESKEY_CRM_meta_timeout: [86400000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] -# Post migration on an-a02n01; stop is called: - -Post migration on an-a02n02: - -environment::OCF_RESKEY_CRM_meta_migrate_source: [an-a02n01] -environment::OCF_RESKEY_CRM_meta_migrate_target: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_name: [migrate_from] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_timeout: [600000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] +=cut Manual calls; -Checking server state after: [srv02-c8s-fujitsu] was migrated to an-a02n02; +# Start a server; +/usr/lib/ocf/resource.d/alteeve/server -vv --log-secure --server --start -environment::OCF_RESKEY_CRM_meta_interval: [60000] -environment::OCF_RESKEY_CRM_meta_name: [monitor] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_timeout: [20000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] +# Stop a server +/usr/lib/ocf/resource.d/alteeve/server -vv --log-secure --server --stop -Stop server (on an-a02n02): +# Monitor a server +/usr/lib/ocf/resource.d/alteeve/server -vv --log-secure --server --monitor -environment::OCF_RESKEY_CRM_meta_name: [stop] -environment::OCF_RESKEY_CRM_meta_on_fail: [block] -environment::OCF_RESKEY_CRM_meta_on_node: [an-a02n02] -environment::OCF_RESKEY_CRM_meta_on_node_uuid: [2] -environment::OCF_RESKEY_CRM_meta_timeout: [86400000] -environment::OCF_RESKEY_name: [srv02-c8s-fujitsu] +# Migrate (run on current host) +/usr/lib/ocf/resource.d/alteeve/server -vv --log-secure --server --migrate-to --migrate-from =cut @@ -1273,23 +1221,11 @@ pmsuspended - The domain has been suspended by guest power management, e.g. ente $anvil->nice_exit({exit_code => 1}); } -### TODO: Write the migration duration to /tmp/anvil.migration..data and have 'anvil-migrate-server' read that in to update the DB. # Migrate the server sub migrate_server { my ($anvil) = @_; - ### This requires a database - # If we can connect to a database, we'll set/clear the 'migrating' flag during migrations. For timing - # reasons we don't let the RA do resyncs. -# $anvil->Database->connect({sensitive => 1}); -# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, secure => 0, key => "log_0132"}); -# if (not $anvil->data->{sys}{database}{connections}) -# { -# # No databases, exit. -# $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, secure => 0, key => "error_0003"}); -# return(1); -# } ### NOTE: For now, we're not going to block if the target is not UpToDate. There are times when a ### user might want to do this (ie: sync will be done soon and the need to evacuate the node diff --git a/scancore-agents/scan-server/scan-server b/scancore-agents/scan-server/scan-server index aa2ac12f..b202ec85 100755 --- a/scancore-agents/scan-server/scan-server +++ b/scancore-agents/scan-server/scan-server @@ -85,6 +85,9 @@ if ($host_type eq "striker") # This is more than data collection in most agents, as it actually handles the changes on the fly collect_data($anvil); +# Look for migration times written out by ocf:alteeve:server. +record_migration_times($anvil); + # Mark that we ran. $anvil->Database->insert_or_update_updated({updated_by => $THIS_FILE}); @@ -94,6 +97,69 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# +# Look for migration times written out by ocf:alteeve:server. +sub record_migration_times +{ + my ($anvil) = @_; + + my $directory = "/tmp/anvil"; + if (-d $directory) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { directory => $directory }}); + local(*DIRECTORY); + opendir(DIRECTORY, $directory); + while(my $file = readdir(DIRECTORY)) + { + next if $file eq "."; + next if $file eq ".."; + next if $file !~ /^migration-duration\./; + my $full_path = $directory."/".$file; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + file => $file, + full_path => $full_path, + }}); + + my $body = $anvil->Storage->read_file({file => $full_path}); + $body =~ s/\n//; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { body => $body }}); + if ($body =~ /server_name=(.*?),migration_took=(.*?)$/) + { + my $server_name = $1; + my $migration_took = $2; + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + server_name => $server_name, + migration_took => $migration_took, + anvil_uuid => $anvil_uuid, + }}); + my $server_uuid = $anvil->Get->server_uuid_from_name({ + server_name => $server_name, + anvil_uuid => $anvil_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { server_uuid => $server_uuid }}); + if (($server_uuid) && ($migration_took)) + { + my ($variable_uuid) = $anvil->Database->insert_or_update_variables({ + file => $THIS_FILE, + line => __LINE__, + variable_name => "server::migration_duration", + variable_value => $migration_took, + variable_default => "", + variable_description => "message_0236", + variable_section => "servers", + variable_source_uuid => $server_uuid, + variable_source_table => "servers", + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { variable_uuid => $variable_uuid }}); + } + } + unlink $full_path; + } + } + + return(0); +} + # This reads in all the data we can find about servers running locally. This is more than data collection in # most agents, as it actually handles the changes on the fly. sub collect_data diff --git a/tools/anvil-daemon b/tools/anvil-daemon index 0c6c79ad..2aa3c8f5 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -946,6 +946,9 @@ AND # Make sure /etc/hosts is updated. $anvil->System->update_hosts(); + # This handles weird bits for things like bug work-arounds. + handle_special_cases($anvil); + # Now look for jobs that have a job status of 'scancore_startup' run_jobs($anvil, 1); @@ -963,6 +966,37 @@ AND return(0); } +# This handles weird bits for things like bug work-arounds. +sub handle_special_cases +{ + my ($anvil) = @_; + + # RHBZ #1961562 - https://bugzilla.redhat.com/show_bug.cgi?id=1961562#c16 + my $host_type = $anvil->Get->host_type(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); + if ($host_type ne "striker") + { + # We're a node or DR host. We need to touch this file. + my $work_around_file = "/etc/qemu/firmware/50-edk2-ovmf-cc.json"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { work_around_file => $work_around_file }}); + if (not -e $work_around_file) + { + $anvil->Storage->write_file({ + debug => 2, + file => $work_around_file, + body => "", + overwrite => 0, + backup => 0, + mode => "0644", + user => "root", + group => "root", + }); + } + } + + return(0); +} + # Configure the local database, if needed. sub prep_database { @@ -1274,7 +1308,7 @@ sub run_jobs backup => 0, mode => "0644", user => "apache", - group => "apache" + group => "apache", }); return(0);