diff --git a/share/anvil.sql b/share/anvil.sql index 11cdf49f..b9bebbec 100644 --- a/share/anvil.sql +++ b/share/anvil.sql @@ -1183,91 +1183,6 @@ CREATE TRIGGER trigger_ip_addresses FOR EACH ROW EXECUTE PROCEDURE history_ip_addresses(); -/* - TODO - This will be added only if we need to use it if the existing network tables aren't sufficient --- This stores information about network interfaces on hosts. It is mainly used to match a MAC address to a --- host. Given that it is possible that network devices can move, the linkage to the host_uuid can change. -CREATE TABLE network_manager ( - network_manager_uuid uuid not null primary key, -- Unlike most other tables, this UUID comes from nmcli itself, and so this matches what's displayed nmcli - network_manager_host_uuid uuid not null, -- The host_uuid for this interface - network_manager_device text not null, -- This is the nmcli "device" name - network_manager_name text not null, -- This is the nmcli "name" name - network_manager_mac text not null, -- This is the MAC address of the interface - network_manager_type text not null, -- This is the nmcli "type" string - network_manager_active text not null, -- This is the nmcli "active" field - network_manager_state text not null, -- This is the nmcli "state" field - network_manager_connected numeric not null, -- This is '0' if the connection is down, or a unix timestamp if it's up. - network_manager_mtu numeric not null, -- This is the MTU of the interface - modified_date timestamp with time zone not null, - - FOREIGN KEY(network_manager_host_uuid) REFERENCES hosts(host_uuid) - ); -ALTER TABLE network_manager OWNER TO admin; - -CREATE TABLE history.network_manager ( - history_id bigserial, - network_manager_uuid uuid not null, - network_manager_host_uuid uuid, - network_manager_mac_address text, - network_manager_name text, - network_manager_speed bigint, - network_manager_mtu bigint, - network_manager_link_state text, - network_manager_operational text, - network_manager_duplex text, - network_manager_medium text, - network_manager_bond_uuid uuid, - network_manager_bridge_uuid uuid, - modified_date timestamp with time zone not null -); -ALTER TABLE history.network_manager OWNER TO admin; - -CREATE FUNCTION history_network_manager() RETURNS trigger -AS $$ -DECLARE - history_network_manager RECORD; -BEGIN - SELECT INTO history_network_manager * FROM network_manager WHERE network_manager_uuid = new.network_manager_uuid; - INSERT INTO history.network_manager - (network_manager_uuid, - network_manager_host_uuid, - network_manager_mac_address, - network_manager_name, - network_manager_speed, - network_manager_mtu, - network_manager_link_state, - network_manager_operational, - network_manager_duplex, - network_manager_medium, - network_manager_bond_uuid, - network_manager_bridge_uuid, - modified_date) - VALUES - (history_network_manager.network_manager_uuid, - history_network_manager.network_manager_host_uuid, - history_network_manager.network_manager_mac_address, - history_network_manager.network_manager_name, - history_network_manager.network_manager_speed, - history_network_manager.network_manager_mtu, - history_network_manager.network_manager_link_state, - history_network_manager.network_manager_operational, - history_network_manager.network_manager_duplex, - history_network_manager.network_manager_medium, - history_network_manager.network_manager_bond_uuid, - history_network_manager.network_manager_bridge_uuid, - history_network_manager.modified_date); - RETURN NULL; -END; -$$ -LANGUAGE plpgsql; -ALTER FUNCTION history_network_manager() OWNER TO admin; - -CREATE TRIGGER trigger_network_manager - AFTER INSERT OR UPDATE ON network_manager - FOR EACH ROW EXECUTE PROCEDURE history_network_manager(); -*/ - - -- This stores files made available to Anvil! systems and DR hosts. CREATE TABLE files ( file_uuid uuid not null primary key, diff --git a/tools/fence_pacemaker b/tools/fence_pacemaker index 325b4276..9c35bae5 100755 --- a/tools/fence_pacemaker +++ b/tools/fence_pacemaker @@ -80,6 +80,7 @@ my $conf = { drbdadm => "/usr/sbin/drbdadm", echo => "/usr/bin/echo", getent => "/usr/bin/getent", + hostnamectl => "/usr/bin/hostnamectl", logger => "/usr/bin/logger", pcs => "/usr/sbin/pcs", }, @@ -296,6 +297,10 @@ sub create_constraint if (lc($peer_rolee) ne "primary") { + # Set the location constraint so that pacemaker doesn't migrate the server when it + # comes back up. + set_location_constraint($conf); + # We're good, fence is complete. to_log($conf, {message => "Resource: [".$target_server."] has been fenced via location constraint successfully!", 'line' => __LINE__, level => 1}); @@ -331,6 +336,175 @@ sub perform_fence return(0); } +# This sets a location contraint so the server prefers our node. +sub set_location_constraint +{ + my ($conf) = @_; + + # Get the host names. + my ($local_host, $peer_host) = get_hostname($conf); + my $server = $conf->{environment}{DRBD_RESOURCE}; + to_log($conf, {message => "server: [".$server."], local_host: [".$local_host."], peer_host: [".$peer_host."]", 'line' => __LINE__, level => 2}); + + if ((not $local_host) or (not $peer_host)) + { + # We can't update the constraints. + return(1); + } + + to_log($conf, {message => "Setting the pacemaker location constraint so that: [".$server."] prefers this host.", 'line' => __LINE__, level => 1}); + my $shell_call = $conf->{path}{exe}{pcs}." constraint location ".$server." prefers ".$local_host."=200 ".$peer_host."=100"; + to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2}); + open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; + while(<$file_handle>) + { + # This should not generate output. + chomp; + my $line = $_; + to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2}); + } + close($file_handle); + + return(0); +} + +# This gets the local short hostname +sub get_hostname +{ + my ($conf) = @_; + + # This will store our name. + $conf->{cluster}{local_node} = ""; + + my $shell_call = $conf->{path}{exe}{hostnamectl}." --static"; + to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2}); + open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; + while(<$file_handle>) + { + # This should not generate output. + chomp; + my $line = $_; + to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2}); + + if ((not $line) or ($line =~ /\s/)) + { + # We can't trust this, it could be an error like "Could not get property: Refusing + # activation, D-Bus is shutting down.". + last; + } + else + { + $conf->{cluster}{local_node} = $line; + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2}); + last; + } + } + + # If we didn't get the host name, try reading /etc/hostname + if (not $conf->{cluster}{local_node}) + { + # Try reading the config file name. + my $shell_call = "/etc/hostname"; + to_log($conf, {message => "Reading: [".$shell_call."]", 'line' => __LINE__, level => 2}); + open (my $file_handle, "<", $shell_call) or warn "Failed to read: [".$shell_call.", error was: [".$!."]"; + while(<$file_handle>) + { + ### NOTE: Don't chop this, we want to record exactly what we read + my $line = $_; + to_log($conf, {message => "line: [".$line."]", 'line' => __LINE__, level => 2}); + + if ((not $line) or ($line =~ /\s/)) + { + # We can't trust this. + last; + } + else + { + $conf->{cluster}{local_node} = $line; + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2}); + last; + } + } + close $file_handle; + } + + # If we still didn't get the hostname, try calling 'hostnamectl --transient' + if (not $conf->{cluster}{local_node}) + { + my $shell_call = $conf->{path}{exe}{hostnamectl}." --transient"; + to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2}); + open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n"; + while(<$file_handle>) + { + # This should not generate output. + chomp; + my $line = $_; + to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2}); + + if ((not $line) or ($line =~ /\s/)) + { + # We can't trust this, it could be an error like "Could not get property: Refusing + # activation, D-Bus is shutting down.". + last; + } + else + { + $conf->{cluster}{local_node} = $line; + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2}); + last; + } + } + } + + # Make sure we've got a short hostname + $conf->{cluster}{local_node} =~ s/\..*$//; + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2}); + + my $peer_host = $conf->{cluster}{target_node}; + my $local_host = $conf->{cluster}{local_node}; + to_log($conf, {message => "peer_host: [".$peer_host."], local_host: [".$local_host."]", 'line' => __LINE__, level => 2}); + + # Last, look through the pacemaker CIB to make sure we're going to use the names used in pacemaker. + if ((not exists $conf->{cluster}{cib}) or (not $conf->{cluster}{cib})) + { + read_cib($conf); + } + + if ($conf->{cluster}{cib}) + { + foreach my $line (split/\n/, $conf->{cluster}{cib}) + { + to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2}); + + if ($line =~ /$/) + { + my $this_node_name = ($line =~ /uname="(.*?)"/)[0]; + to_log($conf, {message => "this_node_name: [".$this_node_name."]", 'line' => __LINE__, level => 2}); + + if (($this_node_name eq $local_host) or ($this_node_name eq $peer_host)) + { + # Name is accurate, we're good + next; + } + elsif ($this_node_name =~ /^$local_host\./) + { + # Update the host name + $conf->{cluster}{local_node} = $this_node_name; + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2}); + } + elsif ($this_node_name =~ /^$peer_host\./) + { + # Update the host name + $conf->{cluster}{target_node} = $this_node_name; + to_log($conf, {message => "cluster::target_node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 2}); + } + } + } + } + + to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."], cluster::target_node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 2}); + return($conf->{cluster}{local_node}, $conf->{cluster}{target_node}); +} # This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is, # abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live) @@ -537,6 +711,10 @@ sub identify_peer to_log($conf, {message => "Checking the status of target node: [".$node."].", 'line' => __LINE__, level => 1}); if (($join eq "down") && ($expected eq "down")) { + # Set the location constraint so that pacemaker doesn't migrate the + # server when it comes back up. + set_location_constraint($conf); + # The node is out. to_log($conf, {message => "The node: [".$node."] is already down. No actual fence needed.", 'line' => __LINE__, level => 1}); exit(7); @@ -625,6 +803,9 @@ sub read_cib exit(1); } + # Cache the CIB. + $conf->{cluster}{cib} = $body; + return($body); } @@ -738,8 +919,12 @@ sub check_peer_is_fenced to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2}); } close $file_handle; - to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1}); + # Set the location constraint so that pacemaker doesn't migrate the server + # when it comes back up. + set_location_constraint($conf); + + to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1}); exit(7); } else @@ -814,7 +999,8 @@ sub to_log # Build the message. We log the line if (($conf->{'log'}{line_numbers}) && ($line)) { - $message = $line."; ".$message; + # Record the PID as well to make it easier to separate parallel runs. + $message = "[".$$."]:".$line."; ".$message; } my $priority_string = $facility;