From 495cb90ca6cabce734dcfb43f8f12c9d91a664f5 Mon Sep 17 00:00:00 2001 From: digimer Date: Sat, 24 Feb 2024 17:16:46 -0500 Subject: [PATCH] Created Network->wait_for_network to hold startup for NM to be up. Added the call to Network->wait_for_network to pause scancore and anvil-daemon startups until NetworkManager says it's up and running. Signed-off-by: digimer --- Anvil/Tools.pm | 1 + Anvil/Tools/Network.pm | 56 ++++++++++++++++++++++++++++++++++++++++++ tools/anvil-daemon | 3 ++- tools/scancore | 3 ++- 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/Anvil/Tools.pm b/Anvil/Tools.pm index ac6f685a..08544dca 100644 --- a/Anvil/Tools.pm +++ b/Anvil/Tools.pm @@ -1258,6 +1258,7 @@ sub _set_paths modprobe => "/usr/sbin/modprobe", mv => "/usr/bin/mv", nc => "/usr/bin/nc", + 'nm-online' => "/usr/bin/nm-online", nmap => "/usr/bin/nmap", nmcli => "/bin/nmcli", nohup => "/usr/bin/nohup", diff --git a/Anvil/Tools/Network.pm b/Anvil/Tools/Network.pm index 1c21b87b..e5d14904 100644 --- a/Anvil/Tools/Network.pm +++ b/Anvil/Tools/Network.pm @@ -36,6 +36,7 @@ my $THIS_FILE = "Network.pm"; # read_nmcli # reset_connection # wait_for_bonds +# wait_for_network # _check_firewalld_conf # _get_existing_zone_interfaces # _get_server_ports @@ -4732,6 +4733,61 @@ sub wait_for_bonds return(0); } + +=head2 wait_for_network + +This method calls C<< nm-online --wait-for-startup --timeout X >>, which in turn waits for Network Manager to report C<< startup complete >> in the journald logs. The default timeout used here is C<< 120 >> seconds (as opposed to the default of C<< 30 >> used by C<< nm-online >> itself). + +From our testing, given the complexity of the network in Anvil! clusters, this much time isn't out of the ordinaryl + + Feb 24 19:13:17 an-a01n01.ci.alteeve.com NetworkManager[1003]: [1708801997.5155] NetworkManager (version 1.44.0-4.el9_3) is starting... (boot:833ea5be-eb44-4214-9e2d-8c6281dec9b6) + ... + Feb 24 19:14:53 an-a01n01.ci.alteeve.com NetworkManager[1003]: [1708802093.9684] manager: startup complete + +B<< Note >>: This method only works on Network Manager based systems. + +The return code from C<< nm-online >> is returned. See C<< man nm-online >> for details, but the main return codes are C<< 0 >> meaning the connection came up within the timeout, C<< 1 >> if the connection failed to come up within the timeout, and C<< 2 >> if there was any error. + +Parameters; + +=head3 timeout (optional, default '120') + +By default, this method will wait for two minutes. If you want to set a timeout, set this as a number of seconds. If the timeout expires and any bonds are still not up, the method will return C<< 1 >>. If this is set to C<< 0 >>, it will wait forever. + +=cut +sub wait_for_network +{ + my $self = shift; + my $parameter = shift; + my $anvil = $self->parent; + my $debug = defined $parameter->{debug} ? $parameter->{debug} : 3; + $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => $debug, key => "log_0125", variables => { method => "Network->wait_for_network()" }}); + + my $timeout = defined $parameter->{timeout} ? $parameter->{timeout} : 120; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + timeout => $timeout, + }}); + + if ((not $timeout) or ($timeout !~ /^\d+$/)) + { + # Invalid timeout. + $timeout = 120; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { timeout => $timeout }}); + } + + my $shell_call = $anvil->data->{path}{exe}{'nm-online'}." --wait-for-startup --timeout ".$timeout; + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { shell_call => $shell_call }}); + + my ($output, $return_code) = $anvil->System->call({debug => $debug, shell_call => $shell_call}); + $anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { + 's1:output' => $output, + 's2:return_code' => $return_code, + }}); + + return($return_code); +} + + ############################################################################################################# # Private functions # ############################################################################################################# diff --git a/tools/anvil-daemon b/tools/anvil-daemon index ecea2e92..3a187f54 100755 --- a/tools/anvil-daemon +++ b/tools/anvil-daemon @@ -70,8 +70,9 @@ $anvil->System->_check_anvil_conf(); # If dnf is running, hold. $anvil->System->wait_on_dnf(); -# If we've got bonds, wait for them to be up. +# If we've got bonds, wait for them to be up. Then wait for NetworkManager to be up. $anvil->Network->wait_for_bonds({debug => 2}); +$anvil->Network->wait_for_network({debug => 2}); # Connect to the database(s). If we have no connections, we'll proceed anyway as one of the 'run_once' tasks # is to setup the database server. diff --git a/tools/scancore b/tools/scancore index baf49a33..82743b38 100755 --- a/tools/scancore +++ b/tools/scancore @@ -70,8 +70,9 @@ $anvil->Storage->read_config(); # If dnf is running, hold. $anvil->System->wait_on_dnf(); -# If we've got bonds, wait for them to be up. +# If we've got bonds, wait for them to be up. Then wait for NetworkManager to be up. $anvil->Network->wait_for_bonds({debug => 2}); +$anvil->Network->wait_for_network({debug => 2}); # Read switches $anvil->Get->switches({list => ["purge", "run-once"], man => $THIS_FILE});