Merge pull request #619 from ClusterLabs/drbd-fencing

Drbd fencing
main
digimer-bot 9 months ago committed by GitHub
commit 584a04b9cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 85
      share/anvil.sql
  2. 190
      tools/fence_pacemaker

@ -1183,91 +1183,6 @@ CREATE TRIGGER trigger_ip_addresses
FOR EACH ROW EXECUTE PROCEDURE history_ip_addresses();
/*
TODO - This will be added only if we need to use it if the existing network tables aren't sufficient
-- This stores information about network interfaces on hosts. It is mainly used to match a MAC address to a
-- host. Given that it is possible that network devices can move, the linkage to the host_uuid can change.
CREATE TABLE network_manager (
network_manager_uuid uuid not null primary key, -- Unlike most other tables, this UUID comes from nmcli itself, and so this matches what's displayed nmcli
network_manager_host_uuid uuid not null, -- The host_uuid for this interface
network_manager_device text not null, -- This is the nmcli "device" name
network_manager_name text not null, -- This is the nmcli "name" name
network_manager_mac text not null, -- This is the MAC address of the interface
network_manager_type text not null, -- This is the nmcli "type" string
network_manager_active text not null, -- This is the nmcli "active" field
network_manager_state text not null, -- This is the nmcli "state" field
network_manager_connected numeric not null, -- This is '0' if the connection is down, or a unix timestamp if it's up.
network_manager_mtu numeric not null, -- This is the MTU of the interface
modified_date timestamp with time zone not null,
FOREIGN KEY(network_manager_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE network_manager OWNER TO admin;
CREATE TABLE history.network_manager (
history_id bigserial,
network_manager_uuid uuid not null,
network_manager_host_uuid uuid,
network_manager_mac_address text,
network_manager_name text,
network_manager_speed bigint,
network_manager_mtu bigint,
network_manager_link_state text,
network_manager_operational text,
network_manager_duplex text,
network_manager_medium text,
network_manager_bond_uuid uuid,
network_manager_bridge_uuid uuid,
modified_date timestamp with time zone not null
);
ALTER TABLE history.network_manager OWNER TO admin;
CREATE FUNCTION history_network_manager() RETURNS trigger
AS $$
DECLARE
history_network_manager RECORD;
BEGIN
SELECT INTO history_network_manager * FROM network_manager WHERE network_manager_uuid = new.network_manager_uuid;
INSERT INTO history.network_manager
(network_manager_uuid,
network_manager_host_uuid,
network_manager_mac_address,
network_manager_name,
network_manager_speed,
network_manager_mtu,
network_manager_link_state,
network_manager_operational,
network_manager_duplex,
network_manager_medium,
network_manager_bond_uuid,
network_manager_bridge_uuid,
modified_date)
VALUES
(history_network_manager.network_manager_uuid,
history_network_manager.network_manager_host_uuid,
history_network_manager.network_manager_mac_address,
history_network_manager.network_manager_name,
history_network_manager.network_manager_speed,
history_network_manager.network_manager_mtu,
history_network_manager.network_manager_link_state,
history_network_manager.network_manager_operational,
history_network_manager.network_manager_duplex,
history_network_manager.network_manager_medium,
history_network_manager.network_manager_bond_uuid,
history_network_manager.network_manager_bridge_uuid,
history_network_manager.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_network_manager() OWNER TO admin;
CREATE TRIGGER trigger_network_manager
AFTER INSERT OR UPDATE ON network_manager
FOR EACH ROW EXECUTE PROCEDURE history_network_manager();
*/
-- This stores files made available to Anvil! systems and DR hosts.
CREATE TABLE files (
file_uuid uuid not null primary key,

@ -80,6 +80,7 @@ my $conf = {
drbdadm => "/usr/sbin/drbdadm",
echo => "/usr/bin/echo",
getent => "/usr/bin/getent",
hostnamectl => "/usr/bin/hostnamectl",
logger => "/usr/bin/logger",
pcs => "/usr/sbin/pcs",
},
@ -296,6 +297,10 @@ sub create_constraint
if (lc($peer_rolee) ne "primary")
{
# Set the location constraint so that pacemaker doesn't migrate the server when it
# comes back up.
set_location_constraint($conf);
# We're good, fence is complete.
to_log($conf, {message => "Resource: [".$target_server."] has been fenced via location constraint successfully!", 'line' => __LINE__, level => 1});
@ -331,6 +336,175 @@ sub perform_fence
return(0);
}
# This sets a location contraint so the server prefers our node.
sub set_location_constraint
{
my ($conf) = @_;
# Get the host names.
my ($local_host, $peer_host) = get_hostname($conf);
my $server = $conf->{environment}{DRBD_RESOURCE};
to_log($conf, {message => "server: [".$server."], local_host: [".$local_host."], peer_host: [".$peer_host."]", 'line' => __LINE__, level => 2});
if ((not $local_host) or (not $peer_host))
{
# We can't update the constraints.
return(1);
}
to_log($conf, {message => "Setting the pacemaker location constraint so that: [".$server."] prefers this host.", 'line' => __LINE__, level => 1});
my $shell_call = $conf->{path}{exe}{pcs}." constraint location ".$server." prefers ".$local_host."=200 ".$peer_host."=100";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
}
close($file_handle);
return(0);
}
# This gets the local short hostname
sub get_hostname
{
my ($conf) = @_;
# This will store our name.
$conf->{cluster}{local_node} = "";
my $shell_call = $conf->{path}{exe}{hostnamectl}." --static";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ((not $line) or ($line =~ /\s/))
{
# We can't trust this, it could be an error like "Could not get property: Refusing
# activation, D-Bus is shutting down.".
last;
}
else
{
$conf->{cluster}{local_node} = $line;
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2});
last;
}
}
# If we didn't get the host name, try reading /etc/hostname
if (not $conf->{cluster}{local_node})
{
# Try reading the config file name.
my $shell_call = "/etc/hostname";
to_log($conf, {message => "Reading: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, "<", $shell_call) or warn "Failed to read: [".$shell_call.", error was: [".$!."]";
while(<$file_handle>)
{
### NOTE: Don't chop this, we want to record exactly what we read
my $line = $_;
to_log($conf, {message => "line: [".$line."]", 'line' => __LINE__, level => 2});
if ((not $line) or ($line =~ /\s/))
{
# We can't trust this.
last;
}
else
{
$conf->{cluster}{local_node} = $line;
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2});
last;
}
}
close $file_handle;
}
# If we still didn't get the hostname, try calling 'hostnamectl --transient'
if (not $conf->{cluster}{local_node})
{
my $shell_call = $conf->{path}{exe}{hostnamectl}." --transient";
to_log($conf, {message => "Calling: [".$shell_call."]", 'line' => __LINE__, level => 2});
open (my $file_handle, $shell_call." 2>&1 |") or die "Failed to call: [".$shell_call."]. The error was: $!\n";
while(<$file_handle>)
{
# This should not generate output.
chomp;
my $line = $_;
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ((not $line) or ($line =~ /\s/))
{
# We can't trust this, it could be an error like "Could not get property: Refusing
# activation, D-Bus is shutting down.".
last;
}
else
{
$conf->{cluster}{local_node} = $line;
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2});
last;
}
}
}
# Make sure we've got a short hostname
$conf->{cluster}{local_node} =~ s/\..*$//;
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2});
my $peer_host = $conf->{cluster}{target_node};
my $local_host = $conf->{cluster}{local_node};
to_log($conf, {message => "peer_host: [".$peer_host."], local_host: [".$local_host."]", 'line' => __LINE__, level => 2});
# Last, look through the pacemaker CIB to make sure we're going to use the names used in pacemaker.
if ((not exists $conf->{cluster}{cib}) or (not $conf->{cluster}{cib}))
{
read_cib($conf);
}
if ($conf->{cluster}{cib})
{
foreach my $line (split/\n/, $conf->{cluster}{cib})
{
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
if ($line =~ /<node .*>$/)
{
my $this_node_name = ($line =~ /uname="(.*?)"/)[0];
to_log($conf, {message => "this_node_name: [".$this_node_name."]", 'line' => __LINE__, level => 2});
if (($this_node_name eq $local_host) or ($this_node_name eq $peer_host))
{
# Name is accurate, we're good
next;
}
elsif ($this_node_name =~ /^$local_host\./)
{
# Update the host name
$conf->{cluster}{local_node} = $this_node_name;
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."]", 'line' => __LINE__, level => 2});
}
elsif ($this_node_name =~ /^$peer_host\./)
{
# Update the host name
$conf->{cluster}{target_node} = $this_node_name;
to_log($conf, {message => "cluster::target_node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 2});
}
}
}
}
to_log($conf, {message => "cluster::local_node: [".$conf->{cluster}{local_node}."], cluster::target_node: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 2});
return($conf->{cluster}{local_node}, $conf->{cluster}{target_node});
}
# This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is,
# abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live)
@ -537,6 +711,10 @@ sub identify_peer
to_log($conf, {message => "Checking the status of target node: [".$node."].", 'line' => __LINE__, level => 1});
if (($join eq "down") && ($expected eq "down"))
{
# Set the location constraint so that pacemaker doesn't migrate the
# server when it comes back up.
set_location_constraint($conf);
# The node is out.
to_log($conf, {message => "The node: [".$node."] is already down. No actual fence needed.", 'line' => __LINE__, level => 1});
exit(7);
@ -625,6 +803,9 @@ sub read_cib
exit(1);
}
# Cache the CIB.
$conf->{cluster}{cib} = $body;
return($body);
}
@ -738,8 +919,12 @@ sub check_peer_is_fenced
to_log($conf, {message => "Output: [".$line."]", 'line' => __LINE__, level => 2});
}
close $file_handle;
to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1});
# Set the location constraint so that pacemaker doesn't migrate the server
# when it comes back up.
set_location_constraint($conf);
to_log($conf, {message => "Fence completed successfully!", 'line' => __LINE__, level => 1});
exit(7);
}
else
@ -814,7 +999,8 @@ sub to_log
# Build the message. We log the line
if (($conf->{'log'}{line_numbers}) && ($line))
{
$message = $line."; ".$message;
# Record the PID as well to make it easier to separate parallel runs.
$message = "[".$$."]:".$line."; ".$message;
}
my $priority_string = $facility;

Loading…
Cancel
Save