From a034583213ddb019e55e7132b9e546bf5189f677 Mon Sep 17 00:00:00 2001 From: Digimer Date: Mon, 20 Sep 2021 22:34:36 -0400 Subject: [PATCH 1/8] * Updated DRBD->gather_data() to record TCP/IP data between connections of two hosts. * Updated anvil-manage-dr to use the TCP ports already configured for a resource when re-configuring a DR resource that has been previously configured. Signed-off-by: Digimer --- Anvil/Tools/DRBD.pm | 11 +++++++++++ Anvil/Tools/Database.pm | 2 +- tools/anvil-manage-dr | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Anvil/Tools/DRBD.pm b/Anvil/Tools/DRBD.pm index c34a687d..263579b9 100644 --- a/Anvil/Tools/DRBD.pm +++ b/Anvil/Tools/DRBD.pm @@ -761,6 +761,17 @@ sub gather_data host2_ip_address => $host2_ip_address, host2_tcp_port => $host2_tcp_port, }}); + + $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host1_ip_address} = $host1_ip_address; + $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host1_tcp_port} = $host1_tcp_port; + $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host2_ip_address} = $host2_ip_address; + $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host2_tcp_port} = $host2_tcp_port; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + "s1:new::resource::${resource}::host1_to_host2::${host1_name}::${host2_name}::host1_ip_address" => $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host1_ip_address}, + "s2:new::resource::${resource}::host1_to_host2::${host1_name}::${host2_name}::host1_tcp_port" => $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host1_tcp_port}, + "s3:new::resource::${resource}::host1_to_host2::${host1_name}::${host2_name}::host2_ip_address" => $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host2_ip_address}, + "s4:new::resource::${resource}::host1_to_host2::${host1_name}::${host2_name}::host2_tcp_port" => $anvil->data->{new}{resource}{$resource}{host1_to_host2}{$host1_name}{$host2_name}{host2_tcp_port}, + }}); } # $peer = $this_host_name; diff --git a/Anvil/Tools/Database.pm b/Anvil/Tools/Database.pm index 5ed17f2f..5f473fb6 100644 --- a/Anvil/Tools/Database.pm +++ b/Anvil/Tools/Database.pm @@ -1657,7 +1657,7 @@ sub connect } # Before we try to connect, see if this is a local database and, if so, make sure it's setup. - my $is_local = $anvil->Network->is_local({debug => 2, host => $host}); + my $is_local = $anvil->Network->is_local({debug => $debug, host => $host}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { is_local => $is_local }}); if ($is_local) { diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr index e84de9df..b3f13af5 100755 --- a/tools/anvil-manage-dr +++ b/tools/anvil-manage-dr @@ -723,12 +723,49 @@ sub process_protect $anvil->nice_exit({exit_code => 1}); } - # Get net next pair of TCP ports. + # Have we already configured the DR? If so, what ports are already allocated. + my $node1_to_dr_port = ""; + my $node2_to_dr_port = ""; + foreach my $host1_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{host1_to_host2}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host1_name => $host1_name }}); + foreach my $host2_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{host1_to_host2}{$host1_name}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host2_name => $host2_name }}); + next if (($host1_name ne $dr1_short_host_name) && ($host2_name ne $dr1_short_host_name)); + if (($host1_name eq $node1_short_host_name) or ($host2_name eq $node1_short_host_name)) + { + $node1_to_dr_port = $anvil->data->{new}{resource}{$server_name}{host1_to_host2}{$host1_name}{$host2_name}{host1_tcp_port}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node1_to_dr_port => $node1_to_dr_port }}); + } + else + { + $node2_to_dr_port = $anvil->data->{new}{resource}{$server_name}{host1_to_host2}{$host1_name}{$host2_name}{host1_tcp_port}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node2_to_dr_port => $node2_to_dr_port }}); + } + } + } + + # Get net next pair of TCP ports, if needed. my (undef, $tcp_ports) = $anvil->DRBD->get_next_resource({ debug => 2, dr_tcp_ports => 1, }); - my ($node1_to_dr_port, $node2_to_dr_port) = split/,/, $tcp_ports; + my ($first_port, $second_port) = split/,/, $tcp_ports; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + "s01:first_port" => $first_port, + "s02:second_port" => $second_port, + }}); + if ($node1_to_dr_port eq "") + { + $node1_to_dr_port = $first_port; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node1_to_dr_port => $node1_to_dr_port }}); + } + if ($node2_to_dr_port eq "") + { + $node2_to_dr_port = $second_port; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { node2_to_dr_port => $node2_to_dr_port }}); + } # Show what we're doing my $variables = { From 5ee7b2ccaf6cf05c882508f04c2a63316120f80a Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 21 Sep 2021 15:14:46 -0400 Subject: [PATCH 2/8] Got the '--connect' and '--disconnect' functions working in anvil-manage-dr. Signed-off-by: Digimer --- share/words.xml | 16 ++ tools/anvil-manage-dr | 559 +++++++++++++++++++++++++++++++++++------- 2 files changed, 482 insertions(+), 93 deletions(-) diff --git a/share/words.xml b/share/words.xml index cd618daf..8c20d726 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1267,6 +1267,22 @@ It will take time for it to initialize, please be patient. The job has been recorded with the UUID: [#!variable!job_uuid!#], it will start in just a moment if anvil-daemon is running. Manage DR tasks for a given server This job can protect, remove (unprotect), connect, disconnect or update (connect, sync, disconnect) a given server. + +Do you want to connect the DR host for the server: [#!variable!server!#]? +Note: Depending on the disk write load and storage network speed to the DR host, + this could cause reduced disk write performance. + + About to connect the DR resource for the server: [#!variable!server!#]. + Brought up the connection locally. Now checking that the resource is up on the nodes. + Making sure the resource is up on: [#!variable!host_name!#]. + Waiting now for the our resource to connect. + Done! The server: [#!variable!server!#] is now connected. + +Do you want to disconnect the DR host for the server: [#!variable!server!#]? +Note: Once down, no further changes will be written to the DR host. + + About to disconnect the DR resource for the server: [#!variable!server!#]. + Done! The server: [#!variable!server!#] is now disconnected. Starting: [#!variable!program!#]. diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr index b3f13af5..a669b3f6 100755 --- a/tools/anvil-manage-dr +++ b/tools/anvil-manage-dr @@ -126,16 +126,73 @@ sub sanity_check anvil_uuid => $anvil_uuid, }}); - if (($host_type ne "node") or (not $anvil_uuid)) + # If we're doing a --protect or --remove, make sure we're a node, the cluster is up, and both nodes + # are ready. + if (($anvil->data->{switches}{protect}) or ($anvil->data->{switches}{remove})) { - # This must be run on a node active in the cluster hosting the server being managed. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0332"}); - $anvil->Job->update_progress({ - progress => 100, - message => "error_0332", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); + if (($host_type ne "node") or (not $anvil_uuid)) + { + # This must be run on a node active in the cluster hosting the server being managed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0332"}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0332", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Can we parse the CIB? + my ($problem) = $anvil->Cluster->parse_cib(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); + if ($problem) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0336", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + $anvil->nice_exit({exit_code => 1}); + } + + # Both nodes need to be in the cluster, are they? + if (not $anvil->data->{cib}{parsed}{'local'}{ready}) + { + # We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0337", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + if ($anvil->data->{switches}{protect}) + { + if (not $anvil->data->{switches}{protocol}) + { + $anvil->data->{switches}{protocol} = "async"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 'switches::protocol' => $anvil->data->{switches}{protocol}, + }}); + } + elsif (($anvil->data->{switches}{protocol} ne "sync") && + ($anvil->data->{switches}{protocol} ne "async") && + ($anvil->data->{switches}{protocol} ne "long-throw")) + { + # The protocol is invalid. Please use '--help' for more information. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0342", variables => { protocol => $anvil->data->{switches}{protocol} }}); + $anvil->Job->update_progress({ + progress => 100, + message => "error_0341,!!protocol!".$anvil->data->{switches}{protocol}."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + } } # Get the Anvil! details. @@ -156,7 +213,7 @@ sub sanity_check $anvil->nice_exit({exit_code => 1}); } - # Can we access DR? + # Can we access DR, if we're not the DR host? my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; @@ -164,71 +221,46 @@ sub sanity_check debug => 2, host => $dr1_host_name, }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - password => $anvil->Log->is_secure($password), - dr1_host_uuid => $dr1_host_uuid, - dr1_host_name => $dr1_host_name, - dr_ip => $dr_ip, - }}); - if ((not $dr_ip) or ($dr_ip eq "!!error!!")) - { - # Failed to find an IP we can access the DR host. Has it been configured? Is it running? Exiting. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0334", variables => { host_name => $dr1_host_name }}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0334,!!host_name!".$dr1_host_name."!!", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); - } - - # Test access. - my $access = $anvil->Remote->test_access({ - target => $dr_ip, - password => $password, - }); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); - if (not $access) + if ($dr1_host_uuid ne $anvil->Get->host_uuid) { - # Failed to access the DR host. Is it running? Exiting. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0335", variables => { - host_name => $dr1_host_name, - ip_address => $dr_ip, + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + password => $anvil->Log->is_secure($password), + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr_ip => $dr_ip, }}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0335,!!host_name!".$dr1_host_name."!!,!!ip_address!".$dr_ip."!!", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); - } - - # Can we parse the CIB? - my ($problem) = $anvil->Cluster->parse_cib(); - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }}); - if ($problem) - { - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0336"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0336", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); - $anvil->nice_exit({exit_code => 1}); - } - - # Both nodes need to be in the cluster, are they? - if (not $anvil->data->{cib}{parsed}{'local'}{ready}) - { - # We're not a full member of the cluster yet. Please try again once we're fully in. Exiting. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0337"}); - $anvil->Job->update_progress({ - progress => 0, - message => "error_0337", - job_status => "failed", + if ((not $dr_ip) or ($dr_ip eq "!!error!!")) + { + # Failed to find an IP we can access the DR host. Has it been configured? Is it running? Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "error_0334", variables => { host_name => $dr1_host_name }}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0334,!!host_name!".$dr1_host_name."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } + + # Test access. + my $access = $anvil->Remote->test_access({ + target => $dr_ip, + password => $password, }); - $anvil->nice_exit({exit_code => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + if (not $access) + { + # Failed to access the DR host. Is it running? Exiting. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0335", variables => { + host_name => $dr1_host_name, + ip_address => $dr_ip, + }}); + $anvil->Job->update_progress({ + progress => 0, + message => "error_0335,!!host_name!".$dr1_host_name."!!,!!ip_address!".$dr_ip."!!", + job_status => "failed", + }); + $anvil->nice_exit({exit_code => 1}); + } } ### TODO: We can queue a job to update the peer later, there's no real need, in the long run, for the @@ -318,27 +350,6 @@ sub sanity_check $anvil->nice_exit({exit_code => 1}); } - if (not $anvil->data->{switches}{protocol}) - { - $anvil->data->{switches}{protocol} = "async"; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { - 'switches::protocol' => $anvil->data->{switches}{protocol}, - }}); - } - elsif (($anvil->data->{switches}{protocol} ne "sync") && - ($anvil->data->{switches}{protocol} ne "async") && - ($anvil->data->{switches}{protocol} ne "long-throw")) - { - # The protocol is invalid. Please use '--help' for more information. - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "error_0342", variables => { protocol => $anvil->data->{switches}{protocol} }}); - $anvil->Job->update_progress({ - progress => 100, - message => "error_0341,!!protocol!".$anvil->data->{switches}{protocol}."!!", - job_status => "failed", - }); - $anvil->nice_exit({exit_code => 1}); - } - # Are we being asked to actuall do something? if (((not $anvil->data->{switches}{'connect'}) && (not $anvil->data->{switches}{disconnect}) && @@ -431,6 +442,10 @@ Options (all require --server ); This removes the DR image from the DR host for the server, freeing up space on DR but removing the protection afforded by DR. + --server + + This is the name or UUID of the server being worked on. + --update This tells the DR to be connected and sync, Once the volume(s) on DR are 'UpToDate', the connection @@ -465,6 +480,364 @@ Exiting. { process_protect($anvil, $terminal); } + elsif ($anvil->data->{switches}{'connect'}) + { + process_connect($anvil, $terminal); + } + elsif ($anvil->data->{switches}{disconnect}) + { + process_disconnect($anvil, $terminal); + } + + return(0); +} + +sub process_disconnect +{ + my ($anvil, $terminal) = @_; + + # Parse out the DRBD resource's backing the server and get their LV sizes. + $anvil->Database->get_server_definitions(); + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + my $anvil_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; + my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{short_host_name}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name}; + my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{short_host_name}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; + my $dr1_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{short_host_name}; + my $server_name = $anvil->data->{server}{'server-name'}; + my $server_uuid = $anvil->data->{server}{'server-uuid'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, + anvil_password => $anvil->Log->is_secure($anvil_password), + node1_host_uuid => $node1_host_uuid, + node1_host_name => $node1_host_name, + node1_short_host_name => $node1_short_host_name, + node2_host_uuid => $node2_host_uuid, + node2_host_name => $node2_host_name, + node2_short_host_name => $node2_short_host_name, + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr1_short_host_name => $dr1_short_host_name, + server_name => $server_name, + server_uuid => $server_uuid, + }}); + + ### NOTE: 'Yes' is set when a job is picked up, so this won't re-register the job. + my $record_job = 0; + if (not $anvil->data->{switches}{Yes}) + { + my $variables = { + server => $server_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0392", variables => $variables}); + $anvil->Job->update_progress({ + progress => 25, + message => "job_0392", + variables => $variables, + }); + + # Ask the user to confirm. + print "\n".$anvil->Words->string({key => "message_0021"})."\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + + if ($answer =~ /^y/i) + { + print $anvil->Words->string({key => "message_0175"})."\n"; + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + else + { + print $anvil->Words->string({key => "message_0022"})."\n"; + $anvil->nice_exit({exit_code => 0}); + } + } + elsif (not $anvil->data->{switches}{'job-uuid'}) + { + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + + if ($record_job) + { + my $job_data = "server=".$anvil->data->{switches}{server}."\n"; + $job_data .= "disconnect=1\n"; + + # Register the job with the DR host. + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + debug => 2, + job_command => $anvil->data->{path}{exe}{'anvil-manage-dr'}.$anvil->Log->switches, + job_data => $job_data, + job_name => "server::dr", + job_title => "job_0384", + job_description => "job_0385", + job_progress => 0, + job_host_uuid => $dr1_host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + + # Report the job UUID. + print $anvil->Words->string({key => "job_0383", variables => { job_uuid => $job_uuid }})."\n"; + + $anvil->nice_exit({exit_code => 0}); + } + + # If the resource is down, bring it up. + my $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0393", variables => $variables}); + $anvil->Job->update_progress({ + progress => 50, + message => "job_0393", + variables => $variables, + }); + + # Bring up the connection locally, and then also bring up the connection on the nodes, in case the + # server is down. + my $drbd_down_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." && ".$anvil->data->{path}{exe}{drbdadm}." down ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_down_call => $drbd_down_call }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $drbd_down_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + # Done! + $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0394", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "job_0394", + variables => $variables, + }); + + return(0); +} + +sub process_connect +{ + my ($anvil, $terminal) = @_; + + # Parse out the DRBD resource's backing the server and get their LV sizes. + $anvil->Database->get_server_definitions(); + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + my $anvil_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; + my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{short_host_name}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name}; + my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{short_host_name}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; + my $dr1_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{short_host_name}; + my $server_name = $anvil->data->{server}{'server-name'}; + my $server_uuid = $anvil->data->{server}{'server-uuid'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, + anvil_password => $anvil->Log->is_secure($anvil_password), + node1_host_uuid => $node1_host_uuid, + node1_host_name => $node1_host_name, + node1_short_host_name => $node1_short_host_name, + node2_host_uuid => $node2_host_uuid, + node2_host_name => $node2_host_name, + node2_short_host_name => $node2_short_host_name, + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr1_short_host_name => $dr1_short_host_name, + server_name => $server_name, + server_uuid => $server_uuid, + }}); + + ### NOTE: 'Yes' is set when a job is picked up, so this won't re-register the job. + my $record_job = 0; + if (not $anvil->data->{switches}{Yes}) + { + my $variables = { + server => $server_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0386", variables => $variables}); + $anvil->Job->update_progress({ + progress => 25, + message => "job_0386", + variables => $variables, + }); + + # Ask the user to confirm. + print "\n".$anvil->Words->string({key => "message_0021"})."\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + + if ($answer =~ /^y/i) + { + print $anvil->Words->string({key => "message_0175"})."\n"; + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + else + { + print $anvil->Words->string({key => "message_0022"})."\n"; + $anvil->nice_exit({exit_code => 0}); + } + } + elsif (not $anvil->data->{switches}{'job-uuid'}) + { + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + + if ($record_job) + { + my $job_data = "server=".$anvil->data->{switches}{server}."\n"; + $job_data .= "connect=1\n"; + + # Register the job with the DR host. + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + debug => 2, + job_command => $anvil->data->{path}{exe}{'anvil-manage-dr'}.$anvil->Log->switches, + job_data => $job_data, + job_name => "server::dr", + job_title => "job_0384", + job_description => "job_0385", + job_progress => 0, + job_host_uuid => $dr1_host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + + # Report the job UUID. + print $anvil->Words->string({key => "job_0383", variables => { job_uuid => $job_uuid }})."\n"; + + $anvil->nice_exit({exit_code => 0}); + } + + # If the resource is down, bring it up. + my $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0387", variables => $variables}); + $anvil->Job->update_progress({ + progress => 50, + message => "job_0387", + variables => $variables, + }); + + # Bring up the connection locally, and then also bring up the connection on the nodes, in case the + # server is down. + my $drbd_up_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." || ".$anvil->data->{path}{exe}{drbdadm}." up ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_up_call => $drbd_up_call }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $drbd_up_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0388", variables => $variables}); + $anvil->Job->update_progress({ + progress => 60, + message => "job_0388", + }); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either/both nodes + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + $variables = { host_name => $peer_host_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0389", variables => $variables}); + $anvil->Job->update_progress({ + progress => 70, + message => "job_0389", + variables => $variables, + }); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_sn_ip, + password => $anvil_password, + shell_call => $drbd_up_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # Now watch until we connect to at least one peer. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0390"}); + $anvil->Job->update_progress({ + progress => 80, + message => "job_0390", + }); + my $waiting = 1; + while($waiting) + { + sleep 5; + $anvil->DRBD->gather_data({debug => 2}); + + $waiting = 0; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume => $volume }}); + my $volume_up = 0; + foreach my $peer (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}}) + { + my $replication_speed = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{replication_speed}; + my $peer_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{peer_role}; + my $peer_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{peer_disk_state}; + my $estimated_time_to_sync = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync}; + my $local_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{local_disk_state}; + my $connection_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{connection_state}; + my $local_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{local_role}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer => $peer, + replication_speed => $replication_speed, + peer_role => $peer_role, + peer_disk_state => $peer_disk_state, + estimated_time_to_sync => $estimated_time_to_sync, + local_disk_state => $local_disk_state, + connection_state => $connection_state, + local_role => $local_role, + }}); + + if ($connection_state eq "established") + { + $volume_up = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume_up => $volume_up }}); + } + last if $volume_up; + } + + if (not $volume_up) + { + $waiting = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + + if (not $waiting) + { + # We're ready. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0379"}); + $anvil->Job->update_progress({ + progress => 95, + message => "job_0379", + }); + $waiting = 0; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + + # Done! + $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0391", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "job_0391", + variables => $variables, + }); return(0); } From ea368a942b5e77a641c724e6bc151c96419e2a1b Mon Sep 17 00:00:00 2001 From: Digimer Date: Tue, 21 Sep 2021 23:51:41 -0400 Subject: [PATCH 3/8] Finished the '--update' switch function in anvil-manage-dr. Signed-off-by: Digimer --- share/words.xml | 14 ++- tools/anvil-manage-dr | 271 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+), 4 deletions(-) diff --git a/share/words.xml b/share/words.xml index 8c20d726..0fd2c4b4 100644 --- a/share/words.xml +++ b/share/words.xml @@ -1270,8 +1270,7 @@ It will take time for it to initialize, please be patient. Do you want to connect the DR host for the server: [#!variable!server!#]? Note: Depending on the disk write load and storage network speed to the DR host, - this could cause reduced disk write performance. - + this could cause reduced disk write performance. About to connect the DR resource for the server: [#!variable!server!#]. Brought up the connection locally. Now checking that the resource is up on the nodes. Making sure the resource is up on: [#!variable!host_name!#]. @@ -1279,10 +1278,17 @@ Note: Depending on the disk write load and storage network speed to the DR host, Done! The server: [#!variable!server!#] is now connected. Do you want to disconnect the DR host for the server: [#!variable!server!#]? -Note: Once down, no further changes will be written to the DR host. - +Note: Once down, no further changes will be written to the DR host. About to disconnect the DR resource for the server: [#!variable!server!#]. Done! The server: [#!variable!server!#] is now disconnected. + +Do you want to update the DR host for the server: [#!variable!server!#]? +Note: This will connect the DR host until the disk(s) on DR are (all) UpToDate. + Depending on the disk write load and storage network speed to the DR host, + this could cause reduced disk write performance. + Still sync'ing from: [#!variable!sync_source!#] at a rate of: [#!variable!sync_speed!#/sec]. Estimated time remaining is: [#!variable!time_to_sync!#]. + Sync'ed! Bringing the resource back down now. + Waiting for the connection to come up... Starting: [#!variable!program!#]. diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr index a669b3f6..77d51519 100755 --- a/tools/anvil-manage-dr +++ b/tools/anvil-manage-dr @@ -488,6 +488,277 @@ Exiting. { process_disconnect($anvil, $terminal); } + elsif ($anvil->data->{switches}{update}) + { + process_update($anvil, $terminal); + } + + return(0); +} + +sub process_update +{ + my ($anvil, $terminal) = @_; + + # Parse out the DRBD resource's backing the server and get their LV sizes. + $anvil->Database->get_server_definitions(); + my $anvil_uuid = $anvil->Cluster->get_anvil_uuid(); + my $anvil_password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password}; + my $node1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node1_host_uuid}; + my $node1_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{host_name}; + my $node1_short_host_name = $anvil->data->{hosts}{host_uuid}{$node1_host_uuid}{short_host_name}; + my $node2_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_node2_host_uuid}; + my $node2_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{host_name}; + my $node2_short_host_name = $anvil->data->{hosts}{host_uuid}{$node2_host_uuid}{short_host_name}; + my $dr1_host_uuid = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_dr1_host_uuid}; + my $dr1_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{host_name}; + my $dr1_short_host_name = $anvil->data->{hosts}{host_uuid}{$dr1_host_uuid}{short_host_name}; + my $server_name = $anvil->data->{server}{'server-name'}; + my $server_uuid = $anvil->data->{server}{'server-uuid'}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + anvil_uuid => $anvil_uuid, + anvil_password => $anvil->Log->is_secure($anvil_password), + node1_host_uuid => $node1_host_uuid, + node1_host_name => $node1_host_name, + node1_short_host_name => $node1_short_host_name, + node2_host_uuid => $node2_host_uuid, + node2_host_name => $node2_host_name, + node2_short_host_name => $node2_short_host_name, + dr1_host_uuid => $dr1_host_uuid, + dr1_host_name => $dr1_host_name, + dr1_short_host_name => $dr1_short_host_name, + server_name => $server_name, + server_uuid => $server_uuid, + }}); + + ### NOTE: 'Yes' is set when a job is picked up, so this won't re-register the job. + my $record_job = 0; + if (not $anvil->data->{switches}{Yes}) + { + my $variables = { + server => $server_name, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0395", variables => $variables}); + $anvil->Job->update_progress({ + progress => 25, + message => "job_0395", + variables => $variables, + }); + + # Ask the user to confirm. + print "\n".$anvil->Words->string({key => "message_0021"})."\n"; + my $answer = ; + chomp $answer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { answer => $answer }}); + + if ($answer =~ /^y/i) + { + print $anvil->Words->string({key => "message_0175"})."\n"; + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + else + { + print $anvil->Words->string({key => "message_0022"})."\n"; + $anvil->nice_exit({exit_code => 0}); + } + } + elsif (not $anvil->data->{switches}{'job-uuid'}) + { + $record_job = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { record_job => $record_job }}); + } + + if ($record_job) + { + my $job_data = "server=".$anvil->data->{switches}{server}."\n"; + $job_data .= "update=1\n"; + + # Register the job with the DR host. + my ($job_uuid) = $anvil->Database->insert_or_update_jobs({ + debug => 2, + job_command => $anvil->data->{path}{exe}{'anvil-manage-dr'}.$anvil->Log->switches, + job_data => $job_data, + job_name => "server::dr", + job_title => "job_0384", + job_description => "job_0385", + job_progress => 0, + job_host_uuid => $dr1_host_uuid, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { job_uuid => $job_uuid }}); + + # Report the job UUID. + print $anvil->Words->string({key => "job_0383", variables => { job_uuid => $job_uuid }})."\n"; + + $anvil->nice_exit({exit_code => 0}); + } + + # If the resource is down, bring it up. + my $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0387", variables => $variables}); + $anvil->Job->update_progress({ + progress => 50, + message => "job_0387", + variables => $variables, + }); + + # Bring up the connection locally, and then also bring up the connection on the nodes, in case the + # server is down. + my $drbd_up_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." || ".$anvil->data->{path}{exe}{drbdadm}." up ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_up_call => $drbd_up_call }}); + my ($output, $return_code) = $anvil->System->call({shell_call => $drbd_up_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0388", variables => $variables}); + $anvil->Job->update_progress({ + progress => 60, + message => "job_0388", + }); + foreach my $this_host_uuid ($node1_host_uuid, $node2_host_uuid, $dr1_host_uuid) + { + # "Peer" in this context is either/both nodes + next if $this_host_uuid eq $anvil->Get->host_uuid(); + my $peer_host_name = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{short_host_name}; + my $peer_sn_ip = $anvil->data->{hosts}{host_uuid}{$this_host_uuid}{network}{sn1}{ip_address}; + my $variables = { host_name => $peer_host_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0389", variables => $variables}); + $anvil->Job->update_progress({ + progress => 70, + message => "job_0389", + variables => $variables, + }); + my ($output, $error, $return_code) = $anvil->Remote->call({ + target => $peer_sn_ip, + password => $anvil_password, + shell_call => $drbd_up_call, + }); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + error => $error, + output => $output, + return_code => $return_code, + }}); + } + + # Now watch until out volume(s) are UpToDate + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0390"}); + $anvil->Job->update_progress({ + progress => 70, + message => "job_0390", + }); + + my $waiting = 1; + while ($waiting) + { + $anvil->DRBD->gather_data({debug => 2}); + $waiting = 0; + my $time_to_sync = ""; + my $sync_speed = ""; + my $sync_source = ""; + foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { volume => $volume }}); + foreach my $peer (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}}) + { + my $replication_speed = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{replication_speed}; + my $peer_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{peer_role}; + my $peer_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{peer_disk_state}; + my $estimated_time_to_sync = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{estimated_time_to_sync}; + my $local_disk_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{local_disk_state}; + my $connection_state = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{connection_state}; + my $local_role = $anvil->data->{new}{resource}{$server_name}{volume}{$volume}{peer}{$peer}{local_role}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + peer => $peer, + replication_speed => $replication_speed, + peer_role => $peer_role, + peer_disk_state => $peer_disk_state, + estimated_time_to_sync => $estimated_time_to_sync, + local_disk_state => $local_disk_state, + connection_state => $connection_state, + local_role => $local_role, + }}); + + if ($connection_state eq "synctarget") + { + $sync_source = $peer; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sync_source => $sync_source }}); + } + if ($replication_speed) + { + $sync_speed = $anvil->Convert->bytes_to_human_readable({'bytes' => $replication_speed}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { sync_speed => $sync_speed }}); + } + if ($estimated_time_to_sync) + { + $time_to_sync = $anvil->Convert->time({'time' => $estimated_time_to_sync, translate => 1}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { time_to_sync => $time_to_sync }}); + } + + if ($local_disk_state ne "uptodate") + { + $waiting = 1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }}); + } + } + } + if ($waiting) + { + if ($sync_source) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + sync_source => $sync_source, + sync_speed => $sync_speed, + time_to_sync => $time_to_sync, + }}); + my $variables = { + sync_source => $sync_source, + sync_speed => $sync_speed, + time_to_sync => $time_to_sync, + }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0396", variables => $variables}); + $anvil->Job->update_progress({ + progress => 80, + message => "job_0396", + variables => $variables, + }); + } + else + { + $anvil->Job->update_progress({ + progress => 75, + message => "job_0398", + variables => $variables, + }); + } + sleep 5; + } + } + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0397", variables => $variables}); + $anvil->Job->update_progress({ + progress => 90, + message => "job_0397", + variables => $variables, + }); + + # Bring up the connection locally, and then also bring up the connection on the nodes, in case the + # server is down. + my $drbd_down_call = $anvil->data->{path}{exe}{drbdsetup}." status ".$server_name." && ".$anvil->data->{path}{exe}{drbdadm}." down ".$server_name; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { drbd_down_call => $drbd_down_call }}); + ($output, $return_code) = $anvil->System->call({shell_call => $drbd_down_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + + # Done! + $variables = { server => $server_name }; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0391", variables => $variables}); + $anvil->Job->update_progress({ + progress => 100, + message => "job_0391", + variables => $variables, + }); return(0); } From 0fc394b2944a5df090e0ff02ccf7b99bf8de9338 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sat, 25 Sep 2021 10:01:03 -0400 Subject: [PATCH 4/8] Updated ocf:akteeve:server to see in the target for a migration has a '.mn1' host name, and if so, and if the target can be reached on that address, it will be used for the live migration. This is to allow for inexpensive 10 Gbps live migration speeds. Removed the stub Server->provision method that was never used. Signed-off-by: Digimer --- Anvil/Tools/Server.pm | 35 ++--------------------------------- notes | 2 +- ocf/alteeve/server | 31 ++++++++++++++++++++++++++++++- share/words.xml | 1 + tools/anvil-manage-dr | 17 ++--------------- 5 files changed, 36 insertions(+), 50 deletions(-) diff --git a/Anvil/Tools/Server.pm b/Anvil/Tools/Server.pm index 3bf56862..97d9a189 100644 --- a/Anvil/Tools/Server.pm +++ b/Anvil/Tools/Server.pm @@ -864,37 +864,6 @@ sub map_network return(0); } -=head2 provision - -This method creates a new (virtual) server on an Anvil! system. - -Parameters; - -=cut -sub provision -{ - my $self = shift; - my $parameter = shift; - my $anvil = $self->parent; - my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Server->provision()" }}); - -=cut -Provision order: - -1. Create LVs and register the storage. - - NOTE: If the LV is already in the DB (from a past install) and the peer is not available and the local - DRBD resource doesn't show Consistent, abort. If the peer is alive but we can't contact it, it's - possible the peer is UpToDate. -2. Create the DRBD resource. If "Inconsistent" on both nodes, force up to date -3. Wait for install media/image to be ready -4. Provision VM and add to Pacemaker. - -=cut - - - return(0); -} =head2 migrate_virsh @@ -918,9 +887,9 @@ This is the host name (or IP) of the host that we're pulling the server from. If set, the server will be pulled. -=head3 target (optional, defaukt is the full local host name) +=head3 target (optional, default is the full local host name) -This is the host name (or IP) Of the host that the server will be pushed to, if C<< source >> is not set. When this is not passed, the local full host name is used as default. +This is the host name (or IP) of the host that the server will be pushed to, if C<< source >> is not set. When this is not passed, the local full host name is used as default. =cut sub migrate_virsh diff --git a/notes b/notes index 604d9fc3..5af42d87 100644 --- a/notes +++ b/notes @@ -6,7 +6,7 @@ TODO: - host_health is a duplicate of 'health' ============ - + # Dump su - postgres -c "pg_dump anvil > /var/lib/pgsql/anvil.out" su - postgres -c "pg_dump --schema-only anvil > /var/lib/pgsql/anvil_schema.out" diff --git a/ocf/alteeve/server b/ocf/alteeve/server index 917df7bb..7f63d952 100755 --- a/ocf/alteeve/server +++ b/ocf/alteeve/server @@ -1260,6 +1260,35 @@ sub migrate_server target_host => $target, }}); + # If there is a '.mnX' (migration network X) entry that can be resolved, we'll change the + # target to use that. This is a dedicated, usually back-to-back network used in nodes specifically + # for migration. + my $test_target = $target; + $test_target =~ s/\..*$//; + $test_target .= "mn1"; # Might want to make this a loop to support MN2+ later + my $test_ip = $anvil->Convert->host_name_to_ip({debug => 2, host_name => $test_target}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + test_target => $test_target, + test_ip => $test_ip, + }}); + if ($test_ip) + { + # Can we access the peer with this? + my ($access) = $anvil->Remote->test_access({debug => 3, target => $test_ip}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }}); + + # Did we get access? + if ($access) + { + # Yup! Switch the target. + $target = $test_ip; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "log_0663", variables => { + target => $test_target, + ip => $target, + }}); + } + } + # Before migrating, make sure the daemons are running on the peer. check_daemons($anvil, "start"); @@ -1422,7 +1451,7 @@ sub migrate_server debug => 2, server => $server, source => $source, - target => $target + target => $target, }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { migrated => $migrated }}); } diff --git a/share/words.xml b/share/words.xml index 0fd2c4b4..4ec2d18d 100644 --- a/share/words.xml +++ b/share/words.xml @@ -2053,6 +2053,7 @@ The file: [#!variable!file!#] needs to be updated. The difference is: Stopped the postgresql daemon as a peer is currently primary. Our most recent database dump is newer than any from our peers. As such, we'll just start the database without a load. Retrying to connect to the database. + The target can be reached on the dedicated migration network: [#!variable!target!#] via the IP address: [#!variable!ip!#], switching to use that for the RAM copy. The host name: [#!variable!target!#] does not resolve to an IP address. diff --git a/tools/anvil-manage-dr b/tools/anvil-manage-dr index 77d51519..3d6f3143 100755 --- a/tools/anvil-manage-dr +++ b/tools/anvil-manage-dr @@ -5,6 +5,8 @@ # NOTE: Unlike most jobs, this one will directly work on the peer node and the DR host using SSH connections. # This behaviour is likely to change later as it's not ideal. # +# TODO: --remove is not yet implemented, this needs to be done. Use anvil-delete-server for methods to delete. +# # Exit codes; # 0 = Normal exit. # 1 = Any problem that causes an early exit. @@ -85,8 +87,6 @@ if (not $anvil->data->{switches}{'job-uuid'}) sanity_check($anvil, $terminal); -do_task($anvil, $terminal); - $anvil->nice_exit({exit_code => 0}); @@ -95,19 +95,6 @@ $anvil->nice_exit({exit_code => 0}); # Functions # ############################################################################################################# -sub do_task -{ - my ($anvil, $terminal) = @_; - - # What task am I doing? - if ($anvil->data->{switches}{protect}) - { - - } - - return(0); -} - sub sanity_check { my ($anvil, $terminal) = @_; From 8e436ffec7fc78a28039c3375ce3fdb76a7dd189 Mon Sep 17 00:00:00 2001 From: Digimer Date: Sat, 25 Sep 2021 17:58:20 -0400 Subject: [PATCH 5/8] WIP: Started work on a new Storage->copy_device() method that will do 'dd' calls. Fixed a bug in System->update_hosts() that was causing hosts to be constantly rewritten. (Well, I hope fixed, this has been a notoriously buggy part of the program...) Signed-off-by: Digimer --- Anvil/Tools/Storage.pm | 62 ++++++++++++++++++++++++++++++++++++ Anvil/Tools/System.pm | 58 ++++++++++++++++++++------------- tools/anvil-provision-server | 1 + 3 files changed, 98 insertions(+), 23 deletions(-) diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index 3915aa43..db81542b 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -22,6 +22,7 @@ my $THIS_FILE = "Storage.pm"; # check_md5sums # compress # copy_file +# copy_device # delete_file # find # get_file_stats @@ -1263,6 +1264,67 @@ fi"; } +=head3 copy_device + +This uses the C<< dd >> system call, possibly over ssh, to create a copy of the source on the destination. Being based on C<< dd >>, this works with raw block devices and to or from files. + +B<< Warning >>: This must be used carefully! Calling this backwards could destroy data! + +B<< Note >>: The caller is responsible for ensuring the data on the soure will not change during the copy. If the source is a server, make sure it's off. If the source is a file system, make sure it's unmounted. + +Parameters; + +=head3 block_size (optional, default '4M') + +This is the block size to be used for the copy. Specifically, this transtes into 'read bytes, copy, read bytes, copy'. This should match the size of the logical extents, block size or similar where needed. Most LVM logical extents are 4 MiB, so the default of C<< 4M >> should be fine in most cases. + +B<< Note >>: See C<< man dd >> for valid formatting of this option. + +=head3 calculate_sums (Optional, default '0') + +If set to C<< 1 >>, the C<< md5sum >> of the source and destination are calculated and returned. If this is not used, the returned sum fields will be an empty string. + +B<< Note >>: Calculating sums is highly advised, but can increase the time it takes for the copy to complete! + +=head3 destination (required) + +This is the full path to the destination (copy to) file or device. If the source is remote, used the format C<< @target:/path/to/file >>. + +B<< Note >>: Only the source OR the destination can be remote, not both! + +=head3 source (required) + +This is the full path to the source (copy from) file or device. If the source is remote, used the format C<< @target:/path/to/file >>. + +B<< Note >>: Only the source OR the destination can be remote, not both! + +=cut +sub copy_device +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->delete_file()" }}); + + my $file = defined $parameter->{file} ? $parameter->{file} : ""; + my $password = defined $parameter->{password} ? $parameter->{password} : ""; + my $port = defined $parameter->{port} ? $parameter->{port} : 22; + my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root"; + my $target = defined $parameter->{target} ? $parameter->{target} : ""; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + file => $file, + password => $anvil->Log->is_secure($password), + port => $port, + remote_user => $remote_user, + target => $target, + }}); + + + return(""); +} + + =head3 delete_file This deletes a file. Pretty much what it says on the tin. When run locally, it uses C<< unlink >>. When run on a remote machine, it uses C<< rm -f >>. As such, this will not delete directories, nor will it delete recursively. diff --git a/Anvil/Tools/System.pm b/Anvil/Tools/System.pm index 8d6709fa..e4483c6c 100644 --- a/Anvil/Tools/System.pm +++ b/Anvil/Tools/System.pm @@ -4902,7 +4902,6 @@ sub update_hosts # Read in the existing hosts file my $add_header = 1; - my $changes = 0; my $added_lo_ipv4 = 0; my $added_lo_ipv6 = 0; my $new_body = ""; @@ -4912,6 +4911,35 @@ sub update_hosts }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_body => $old_body }}); + # Look for duplicate empty lines and clear them. + my $last_line_blank = 0; + my $cleaned_body = ""; + foreach my $line (split/\n/, $old_body) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }}); + if ($line) + { + $last_line_blank = 0; + $cleaned_body .= $line."\n"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { last_line_blank => $last_line_blank }}); + } + elsif (not $last_line_blank) + { + $last_line_blank = 1; + $cleaned_body .= $line."\n"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { last_line_blank => $last_line_blank }}); + } + } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { last_line_blank => $last_line_blank }}); + + my $difference = diff \$old_body, \$cleaned_body, { STYLE => 'Unified' }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { difference => $difference }}); + if ($difference) + { + $old_body = $cleaned_body; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { old_body => $old_body }}); + } + # This will track the IPs we've seen. We'll only write these out once, and skip any futher entries # that may be found. my $written_ips = {}; @@ -4955,9 +4983,6 @@ sub update_hosts { if ($line ne "127.0.0.1\tlocalhost localhost.localdomain localhost4 localhost4.localdomain4") { - $changes = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); - if (not $added_lo_ipv4) { $new_body .= "127.0.0.1\tlocalhost localhost.localdomain localhost4 localhost4.localdomain4\n"; @@ -4977,9 +5002,6 @@ sub update_hosts { if ($line ne "::1\t\tlocalhost localhost.localdomain localhost6 localhost6.localdomain6") { - $changes = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); - if (not $added_lo_ipv6) { $new_body .= "::1\t\tlocalhost localhost.localdomain localhost6 localhost6.localdomain6\n"; @@ -5019,8 +5041,6 @@ sub update_hosts if (exists $written_ips->{$ip_address}) { # Skipping at least one line, rewrite the file. - $changes = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); next; } $written_ips->{$ip_address} = 1; @@ -5049,8 +5069,6 @@ sub update_hosts new_ip => $ip_address, host => $name, }}); - $changes = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); next; } } @@ -5092,11 +5110,9 @@ sub update_hosts my $header = $anvil->Words->string({key => "message_0177"}); $header =~ s/^\n//; $new_body = $header.$new_body; - $changes = 1; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - 's1:changes' => $changes, - 's2:header' => $header, - 's3:new_body' => $new_body, + 's1:header' => $header, + 's2:new_body' => $new_body, }}); } @@ -5132,9 +5148,6 @@ sub update_hosts $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { new_line_count => $new_line_count }}); if ($new_line_count) { - $changes = 1; - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { changes => $changes }}); - $new_body .= "\n"; #$new_body .= "\n# ".$anvil->Words->string({key => "message_0178", variables => { date => $anvil->Get->date_and_time({debug => $debug}) }})."\n"; foreach my $ip_address (@{$ip_order}) @@ -5144,11 +5157,10 @@ sub update_hosts } } - $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - 's1:changes' => $changes, - 's2:new_body' => $new_body, - }}); - if ($changes) + $difference = ""; + $difference = diff \$old_body, \$new_body, { STYLE => 'Unified' }; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { difference => $difference }}); + if ($difference) { # Write the new file. $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { new_body => $new_body }}); diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index 0ddee354..bb453791 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -44,6 +44,7 @@ $anvil->data->{switches}{uuid} = ""; $anvil->data->{switches}{ram} = ""; $anvil->data->{switches}{'storage-group'} = ""; $anvil->data->{switches}{'storage-size'} = ""; +$anvil->data->{switches}{'use-image'} = ""; $anvil->Get->switches; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }}); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { From 47c832bc0e17ab18a3e2a014bac886d9842866a7 Mon Sep 17 00:00:00 2001 From: Digimer Date: Thu, 7 Oct 2021 17:10:25 -0400 Subject: [PATCH 6/8] * Updated Network->get_ips() to check for 'permaddr' when processing 'ip addr list' to ensure the partmanent MAC is used. * Updated scan-filesystems to set swap usage alerts to notice level only. * Updated scan-network to pull the permanent MAC address from an 'ethtool -P ' call to deal with the fact that wireless interfaces don't have their real MAC in the sysfs address file. * Updated anvil-provision-server to set the rtc_tickpolicy to catchup. Signed-off-by: Digimer --- Anvil/Tools/Network.pm | 13 ++++- Anvil/Tools/Storage.pm | 47 ++++++++++++++----- notes | 4 +- .../scan-filesystems/scan-filesystems | 14 +++++- scancore-agents/scan-hardware/scan-hardware | 4 +- scancore-agents/scan-network/scan-network | 41 ++++++++++++---- tools/anvil-provision-server | 2 +- 7 files changed, 95 insertions(+), 30 deletions(-) diff --git a/Anvil/Tools/Network.pm b/Anvil/Tools/Network.pm index 254d98f2..13633844 100644 --- a/Anvil/Tools/Network.pm +++ b/Anvil/Tools/Network.pm @@ -2151,8 +2151,17 @@ fi"; } if ($line =~ /ether (.*?) /i) { - my $mac_address = $1; - $anvil->data->{network}{$host}{interface}{$in_iface}{mac_address} = $mac_address; + my $mac_address = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { mac_address => $mac_address }}); + + # Wireless interfaces have a 'permaddr' that is stable. The MAC address shown by 'ether' changes constantly, for some odd reason. + if ($line =~ /permaddr (.*)$/) + { + $mac_address = $1; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { mac_address => $mac_address }}); + } + + $anvil->data->{network}{$host}{interface}{$in_iface}{mac_address} = $mac_address; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { "network::${host}::interface::${in_iface}::mac_address" => $anvil->data->{network}{$host}{interface}{$in_iface}{mac_address}, }}); diff --git a/Anvil/Tools/Storage.pm b/Anvil/Tools/Storage.pm index db81542b..d29a7fca 100644 --- a/Anvil/Tools/Storage.pm +++ b/Anvil/Tools/Storage.pm @@ -1272,6 +1272,8 @@ B<< Warning >>: This must be used carefully! Calling this backwards could destro B<< Note >>: The caller is responsible for ensuring the data on the soure will not change during the copy. If the source is a server, make sure it's off. If the source is a file system, make sure it's unmounted. +B<< Note >>: If the C<< source >> or C<< destination >> is a remote host, passwordless SSH must be configured for this to work! + Parameters; =head3 block_size (optional, default '4M') @@ -1298,6 +1300,10 @@ This is the full path to the source (copy from) file or device. If the source is B<< Note >>: Only the source OR the destination can be remote, not both! +=head3 status_file (required) + +This is the path to the status file used to record the progress of the copy. This will contain a parsed version of the C<< dd ... --status=progress >> output. When the copy is done, if C<< calculate_sums >> is set, then the C<< source= >> and C<< destination= >> will be recorded, marking the completion of the copy. If not set, those same variables will be written without a value, still marking the end of the copy. If there is a problem, the last line of the file be C<< failed= >>. + =cut sub copy_device { @@ -1305,21 +1311,40 @@ sub copy_device my $parameter = shift; my $anvil = $self->parent; my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; - $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->delete_file()" }}); + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Storage->copy_device()" }}); - my $file = defined $parameter->{file} ? $parameter->{file} : ""; - my $password = defined $parameter->{password} ? $parameter->{password} : ""; - my $port = defined $parameter->{port} ? $parameter->{port} : 22; - my $remote_user = defined $parameter->{remote_user} ? $parameter->{remote_user} : "root"; - my $target = defined $parameter->{target} ? $parameter->{target} : ""; + my $block_size = defined $parameter->{block_size} ? $parameter->{block_size} : ""; + my $calculate_sums = defined $parameter->{calculate_sums} ? $parameter->{calculate_sums} : ""; + my $destination = defined $parameter->{destination} ? $parameter->{destination} : ""; + my $source = defined $parameter->{source} ? $parameter->{source} : ""; + my $status_file = defined $parameter->{status_file} ? $parameter->{status_file} : ""; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { - file => $file, - password => $anvil->Log->is_secure($password), - port => $port, - remote_user => $remote_user, - target => $target, + block_size => $block_size, + calculate_sums => $calculate_sums, + destination => $destination, + source => $source, + status_file => $status_file, }}); + if (not $source) + { + # No source passed. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Storage->copy_device()", parameter => "source" }}); + return('!!error!!'); + } + if (not $destination) + { + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "err", key => "log_0020", variables => { method => "Storage->copy_device()", parameter => "destination" }}); + return('!!error!!'); + } + if (not $block_size) + { + $block_size = "4M"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { block_size => $block_size }}); + } + + # Verify that the source exists. + return(""); } diff --git a/notes b/notes index 5af42d87..43fac7aa 100644 --- a/notes +++ b/notes @@ -934,7 +934,7 @@ OS10(conf-vlt-1)# discovery-interface ethernet 1/1/25-1/1/26 # Configure the same MAC address to the VLT on both switches: OS10# configure terminal OS10(config)# vlt-domain 1 -OS10(conf-vlt-1)# vlt-mac 00:00:00:00:00:02 +OS10(conf-vlt-1)# vlt-mac 00:00:00:00:00:02 # Set once per VLT domain, not per switch OS10# show vlt 1 mismatch (If no issues, VLT is OK) @@ -958,7 +958,7 @@ OS10(config)# write memory OS10(config)# hostname zo-switch01 zo-switch01(config)# interface vlan 100 zo-switch01(conf-if-vl-100)# description BCN1 -zo-switch01(conf-if-vl-100)# interface range ethernet 1/1/1-1/1/10 +zo-switch01(conf-if-vl-100)# interface range ethernet 1/1/1-1/1/14 zo-switch01(conf-range-eth1/1/1-1/1/10)# switchport access vlan 100 zo-switch01(conf-range-eth1/1/1-1/1/10)# no shutdown zo-switch01(conf-range-eth1/1/1-1/1/10)# exit diff --git a/scancore-agents/scan-filesystems/scan-filesystems b/scancore-agents/scan-filesystems/scan-filesystems index bbf0f2ff..513ecc4f 100755 --- a/scancore-agents/scan-filesystems/scan-filesystems +++ b/scancore-agents/scan-filesystems/scan-filesystems @@ -486,6 +486,11 @@ INSERT INTO if ($changed) { # First time we've fallen under 5% + my $alert_level = "warning"; + if ($new_mount_point eq "") + { + $alert_level = "notice"; + } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_filesystem_alert_0002", variables => $variables}); $anvil->Alert->register({ alert_level => "warning", @@ -537,10 +542,15 @@ INSERT INTO }}); if (($very_low_changed) or ($low_changed)) { - # Clear the alert + # Clear the alert. If this is swap, make it a notice level alert. + my $alert_level = $very_low_changed ? "warning" : "notice"; + if ($new_mount_point eq "") + { + $alert_level = "notice"; + } $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_filesystem_alert_0004", variables => $variables}); $anvil->Alert->register({ - alert_level => $very_low_changed ? "warning" : "notice", + alert_level => $alert_level, clear_alert => 1, message => "scan_filesystem_alert_0004", sort_position => 1, diff --git a/scancore-agents/scan-hardware/scan-hardware b/scancore-agents/scan-hardware/scan-hardware index a2caebb2..aaf3734c 100755 --- a/scancore-agents/scan-hardware/scan-hardware +++ b/scancore-agents/scan-hardware/scan-hardware @@ -952,7 +952,7 @@ sub find_changes swap_percent => $new_swap_percent_used, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "scan_hardware_alert_0020", variables => $variables}); - $anvil->Alert->register({alert_level => "warning", message => "scan_hardware_alert_0020", variables => $variables, set_by => $THIS_FILE }); + $anvil->Alert->register({alert_level => "notice", message => "scan_hardware_alert_0020", variables => $variables, set_by => $THIS_FILE }); } } elsif ($new_scan_hardware_swap_free < $anvil->data->{scancore}{'scan-hardware'}{swap}{clear_threshold}) @@ -976,7 +976,7 @@ sub find_changes swap_percent => $new_swap_percent_used, }; $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 0, level => 1, key => "scan_hardware_alert_0021", variables => $variables}); - $anvil->Alert->register({alert_level => "warning", message => "scan_hardware_alert_0021", variables => $variables, set_by => $THIS_FILE }); + $anvil->Alert->register({alert_level => "notice", message => "scan_hardware_alert_0021", variables => $variables, set_by => $THIS_FILE }); } } } diff --git a/scancore-agents/scan-network/scan-network b/scancore-agents/scan-network/scan-network index a99255e7..c66fc286 100755 --- a/scancore-agents/scan-network/scan-network +++ b/scancore-agents/scan-network/scan-network @@ -177,7 +177,6 @@ sub collect_data { # Pull out the data I want. Note that some of these don't exist with virtio-net interfaces. my $interface = $file; - my $mac_address = -e $full_path."/address" ? $anvil->Storage->read_file({file => $full_path."/address"}) : ""; my $link_state = -e $full_path."/carrier" ? $anvil->Storage->read_file({file => $full_path."/carrier"}) : 0; my $mtu = -e $full_path."/mtu" ? $anvil->Storage->read_file({file => $full_path."/mtu"}) : 0; my $duplex = -e $full_path."/duplex" ? $anvil->Storage->read_file({file => $full_path."/duplex"}) : "unknown"; # full or half? @@ -188,14 +187,7 @@ sub collect_data my $tx_bytes = 0; # How many bytes transmitted my $rx_bytes = 0; # How many bytes received - # If the NIC is a bond member, the MAC address could be virtual. - if (-e $full_path."/bonding_slave/perm_hwaddr") - { - $mac_address = $anvil->Storage->read_file({file => $full_path."/bonding_slave/perm_hwaddr"}); - } - # Clean up some newlines. - $mac_address =~ s/\n$//; $link_state =~ s/\n$//; $mtu =~ s/\n$//; $duplex =~ s/\n$//; @@ -203,7 +195,6 @@ sub collect_data $speed =~ s/\n$//; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { interface => $interface, - mac_address => $mac_address, link_state => $link_state, mtu => $mtu, duplex => $duplex, @@ -211,6 +202,36 @@ sub collect_data speed => $speed, }}); + # The MAC address can faked by a number of ways, so we make an explicit call to 'ethtool' to get the permanent mac address. + my $mac_address = ""; + my $shell_call = $anvil->data->{path}{exe}{ethtool}." -P ".$interface; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + output => $output, + return_code => $return_code, + }}); + if ($output =~ /(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w)$/) + { + $mac_address = lc($1); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mac_address => $mac_address }}); + } + else + { + # Get it by reading the address file. + if (-e $full_path."/bonding_slave/perm_hwaddr") + { + $mac_address = $anvil->Storage->read_file({file => $full_path."/bonding_slave/perm_hwaddr"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mac_address => $mac_address }}); + } + elsif (-e $full_path."/address") + { + $mac_address = $anvil->Storage->read_file({file => $full_path."/address"}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mac_address => $mac_address }}); + } + } + # These are variables that will be needed if this is a bond interface. my $ip_address = ""; my $subnet_mask = ""; @@ -402,7 +423,7 @@ sub collect_data } # Find the media, if possible. - my ($ethtool, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{ethtool}." $interface"}); + (my $ethtool, $return_code) = $anvil->System->call({shell_call => $anvil->data->{path}{exe}{ethtool}." $interface"}); foreach my $line (split/\n/, $ethtool) { $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { line => $line }}); diff --git a/tools/anvil-provision-server b/tools/anvil-provision-server index bb453791..54c79d1e 100755 --- a/tools/anvil-provision-server +++ b/tools/anvil-provision-server @@ -488,7 +488,7 @@ sub provision_server $shell_call .= " --network bridge=ifn1_bridge1".$nic_model." \\\n"; $shell_call .= " --graphics vnc \\\n"; $shell_call .= " --sound ich9 \\\n"; - $shell_call .= " --clock offset=".$clock_offset." \\\n"; # We may want to support ',rtc_tickpolicy=catchup' + $shell_call .= " --clock offset=".$clock_offset.",rtc_tickpolicy=catchup \\\n"; $shell_call .= " --boot menu=on \\\n"; $shell_call .= " --disk path=/dev/drbd/by-res/".$server."/0".$disk_bus.",driver.io=threads,cache=writeback,driver.discard=unmap,boot.order=1 \\\n"; $shell_call .= " --disk path=".$anvil->data->{job}{install_iso_path}.",device=cdrom,shareable=on,boot.order=2 \\\n"; From a1604344c7db64f268984945cabf72a032e75797 Mon Sep 17 00:00:00 2001 From: Digimer Date: Thu, 7 Oct 2021 22:08:34 -0400 Subject: [PATCH 7/8] * Fixed a bug in Convert->round where numbers with exponents were not handled properly. * Fixed a bug in scan-hardware where the raw bytes free for swap was used to see if the high / cleared thresholds were passed, instead of the percentage as it should have been. * Fixed a bug in scan-network where a new-line wasn't be cleared off the MAC address. Signed-off-by: Digimer --- Anvil/Tools/Convert.pm | 3 +++ scancore-agents/scan-hardware/scan-hardware | 21 ++++++++++++++++++--- scancore-agents/scan-network/scan-network | 3 ++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/Anvil/Tools/Convert.pm b/Anvil/Tools/Convert.pm index f5a619bd..28c50483 100644 --- a/Anvil/Tools/Convert.pm +++ b/Anvil/Tools/Convert.pm @@ -1197,6 +1197,9 @@ sub round # Return if the user passed a double-dash. return('--') if $number eq "--"; + # Take out exponent notation + $number =~ s/e-\d+$//; + # Make a copy of the passed number that I can manipulate. my $rounded_number = $number; diff --git a/scancore-agents/scan-hardware/scan-hardware b/scancore-agents/scan-hardware/scan-hardware index aaf3734c..9f5ee0d5 100755 --- a/scancore-agents/scan-hardware/scan-hardware +++ b/scancore-agents/scan-hardware/scan-hardware @@ -632,6 +632,11 @@ sub find_changes my $new_scan_hardware_memory_free = $anvil->data->{summary}{ram}{proc}{memory_free}; my $new_scan_hardware_swap_total = $anvil->data->{summary}{ram}{proc}{swap_total}; my $new_scan_hardware_swap_free = $anvil->data->{summary}{ram}{proc}{swap_free}; + my $new_scan_hardware_swap_used = $new_scan_hardware_swap_total - $new_scan_hardware_swap_free; + my $new_swap_used_percentage = $anvil->Convert->round({ + number => (($new_scan_hardware_swap_used / $new_scan_hardware_swap_total) * 100), + places => 0, + }); $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { "new_scan_hardware_cpu_model" => $new_scan_hardware_cpu_model, "new_scan_hardware_cpu_cores" => $new_scan_hardware_cpu_cores, @@ -646,6 +651,8 @@ sub find_changes "new_scan_hardware_memory_free" => $anvil->Convert->add_commas({number => $new_scan_hardware_memory_free})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $new_scan_hardware_memory_free}).")", "new_scan_hardware_swap_total" => $anvil->Convert->add_commas({number => $new_scan_hardware_swap_total})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $new_scan_hardware_swap_total}).")", "new_scan_hardware_swap_free" => $anvil->Convert->add_commas({number => $new_scan_hardware_swap_free})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $new_scan_hardware_swap_free}).")", + "new_scan_hardware_swap_used" => $anvil->Convert->add_commas({number => $new_scan_hardware_swap_used})." (".$anvil->Convert->bytes_to_human_readable({'bytes' => $new_scan_hardware_swap_used}).")", + "new_swap_used_percentage" => $new_swap_used_percentage."%", }}); # The LED status needs to be translated. @@ -901,12 +908,16 @@ sub find_changes if ($new_scan_hardware_memory_free ne $old_scan_hardware_memory_free) { # This always changes, so it's an info-level alert - $update = 1; + $update = 1; my $say_new_scan_hardware_memory_free = $anvil->Convert->bytes_to_human_readable({'bytes' => $new_scan_hardware_memory_free})." (".$anvil->Convert->add_commas({number => $new_scan_hardware_memory_free})." #!string!scan_hardware_unit_0001!#)"; my $say_old_scan_hardware_memory_free = $anvil->Convert->bytes_to_human_readable({'bytes' => $old_scan_hardware_memory_free})." (".$anvil->Convert->add_commas({number => $old_scan_hardware_memory_free})." #!string!scan_hardware_unit_0001!#)"; $anvil->Alert->register({set_by => $THIS_FILE, alert_level => "info", message => "scan_hardware_alert_0018,!!new!".$say_new_scan_hardware_memory_free."!!,!!old!".$say_old_scan_hardware_memory_free."!!"}); $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_hardware_alert_0018", variables => { new => $say_new_scan_hardware_memory_free, old => $say_old_scan_hardware_memory_free}}); } + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + new_scan_hardware_swap_free => $new_scan_hardware_swap_free, + old_scan_hardware_swap_free => $old_scan_hardware_swap_free, + }}); if ($new_scan_hardware_swap_free ne $old_scan_hardware_swap_free) { $update = 1; @@ -932,7 +943,11 @@ sub find_changes # Check if swap has gone over the high threshold or dropped below the clear # threashold. - if ($new_scan_hardware_swap_free > $anvil->data->{scancore}{'scan-hardware'}{swap}{high_threshold}) + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + new_scan_hardware_swap_free => $new_scan_hardware_swap_free, + "scancore::scan-hardware::swap::high_threshold" => $anvil->data->{scancore}{'scan-hardware'}{swap}{high_threshold}, + }}); + if ($new_swap_used_percentage > $anvil->data->{scancore}{'scan-hardware'}{swap}{high_threshold}) { # It's high my $changed = $anvil->Alert->check_alert_sent({ @@ -955,7 +970,7 @@ sub find_changes $anvil->Alert->register({alert_level => "notice", message => "scan_hardware_alert_0020", variables => $variables, set_by => $THIS_FILE }); } } - elsif ($new_scan_hardware_swap_free < $anvil->data->{scancore}{'scan-hardware'}{swap}{clear_threshold}) + elsif ($new_swap_used_percentage < $anvil->data->{scancore}{'scan-hardware'}{swap}{clear_threshold}) { # It's low my $changed = $anvil->Alert->check_alert_sent({ diff --git a/scancore-agents/scan-network/scan-network b/scancore-agents/scan-network/scan-network index c66fc286..72acd10a 100755 --- a/scancore-agents/scan-network/scan-network +++ b/scancore-agents/scan-network/scan-network @@ -227,7 +227,8 @@ sub collect_data } elsif (-e $full_path."/address") { - $mac_address = $anvil->Storage->read_file({file => $full_path."/address"}); + $mac_address = $anvil->Storage->read_file({file => $full_path."/address"}); + $mac_address =~ s/\n//; $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { mac_address => $mac_address }}); } } From 2557fa454a85b3de21567cb695fee6b65fb59f5d Mon Sep 17 00:00:00 2001 From: Digimer Date: Fri, 8 Oct 2021 00:56:13 -0400 Subject: [PATCH 8/8] * Updated scan-network to delete old TX/RX records. Signed-off-by: Digimer --- scancore-agents/scan-network/scan-network | 136 +++++++++++++++++- scancore-agents/scan-network/scan-network.xml | 5 +- 2 files changed, 136 insertions(+), 5 deletions(-) diff --git a/scancore-agents/scan-network/scan-network b/scancore-agents/scan-network/scan-network index 72acd10a..187c3614 100755 --- a/scancore-agents/scan-network/scan-network +++ b/scancore-agents/scan-network/scan-network @@ -82,7 +82,10 @@ find_changes($anvil); # Finally, process health weights. process_health($anvil); - # Shut down. +# This clears the TX and RX variable data for interfaces older than 'scancore::database::age_out'. +clear_old_variables($anvil); + +# Shut down. $anvil->ScanCore->agent_shutdown({agent => $THIS_FILE}); @@ -90,6 +93,136 @@ $anvil->ScanCore->agent_shutdown({agent => $THIS_FILE}); # Functions # ############################################################################################################# +# This clears the TX and RX variable data for interfaces older than 'scancore::database::age_out'. +sub clear_old_variables +{ + my ($anvil) = @_; + + # Only Strikers run this. + my $host_type = $anvil->Get->host_type(); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }}); + if ($host_type ne "striker") + { + return(0); + } + + # Read in all interfaces and for each, delete historical records over the age-out time. + my $age = $anvil->data->{scancore}{database}{age_out}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { age => $age }}); + + if ($age =~ /\D/) + { + # Age is not valid, set it to defaults. + $age = 24; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { age => $age }}); + } + + # Get the timestamp to delete thermal and power records older than $age hours. + my $query = "SELECT now() - '".$age."h'::interval;"; + my $old_timestamp = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__})->[0]->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + query => $query, + old_timestamp => $old_timestamp, + }}); + + # Read in all interface RX and TX variables. + foreach my $uuid (keys %{$anvil->data->{cache}{database_handle}}) + { + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + uuid => $uuid, + db_host => $anvil->Get->host_name_from_uuid({host_uuid => $uuid}), + }}); + my $queries = []; + $query = " +SELECT + variable_uuid, + variable_name +FROM + variables +WHERE + variable_name LIKE '%::tx_bytes' +OR + variable_name LIKE '%::rx_bytes' +;"; + my $results = $anvil->Database->query({query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + foreach my $row (@{$results}) + { + my $variable_uuid = $row->[0]; + my $variable_name = $row->[1]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + 's1:variable_name' => $variable_name, + 's2:variable_uuid' => $variable_uuid, + }}); + + # Find out of there are any records to remove at all. + my $query = "SELECT history_id FROM history.variables WHERE variable_uuid = ".$anvil->Database->quote($variable_uuid)." AND modified_date <= '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + + my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + + if ($count) + { + # Find how many records will be left. If it's 0, we'll use an OFFSET 1. + my $query = "SELECT history_id FROM history.variables WHERE variable_uuid = ".$anvil->Database->quote($variable_uuid)." AND modified_date > '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + + my $results = $anvil->Database->query({uuid => $uuid, query => $query, source => $THIS_FILE, line => __LINE__}); + my $count = @{$results}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { + results => $results, + count => $count, + }}); + if ($count) + { + # At least one record will be left, we can do a simple delete. + my $query = "DELETE FROM history.variables WHERE variable_uuid = ".$anvil->Database->quote($variable_uuid)." AND modified_date <= '".$old_timestamp."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + push @{$queries}, $query; + } + else + { + # This would delete everything, reserve at least one record. + foreach my $row (@{$results}) + { + my $history_id = $row->[0]; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { history_id => $history_id }}); + + my $query = "DELETE FROM history.variables WHERE variable_uuid = ".$anvil->Database->quote($variable_uuid)." AND history_id = '".$history_id."';"; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { query => $query }}); + push @{$queries}, $query; + } + } + } + } + + my $commits = @{$queries}; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { commits => $commits }}); + if ($commits) + { + # Commit the DELETEs. + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_network_log_0001", variables => { + age => $age, + records => $commits, + host => $anvil->Get->host_name_from_uuid({host_uuid => $uuid}), + }}); + $anvil->Database->write({debug => 2, uuid => $uuid, query => $queries, source => $THIS_FILE, line => __LINE__}); + undef $queries; + } + } + + return(0); +} + # This reads in all of the network data sub collect_data { @@ -3178,6 +3311,7 @@ AND source_name => $source_name, }}); + ### TODO: Don't set / clear interfaces that appear down but aren't named ifn/bcn/sn as they're probably unconfigured/unusued interfaces. my $problem = 0; if ((not $new_link_state) or ($new_operational eq "down") or ($new_duplex ne "full")) { diff --git a/scancore-agents/scan-network/scan-network.xml b/scancore-agents/scan-network/scan-network.xml index dde31e9c..84081118 100644 --- a/scancore-agents/scan-network/scan-network.xml +++ b/scancore-agents/scan-network/scan-network.xml @@ -214,9 +214,6 @@ Prerequisites: This mode is NOT supported by the Anvil! Intelligent Availability™ platform! - - - - + Aging out RX and TX data under: [#!variable!records!#] interfaces. These have 1 or more historical records older than: [#!variable!age!#] hours old from the database host: [#!variable!host!#].