This is the start of a set of changes needed to rework how we handle DRBD fence requests, so that they create location constraints instead of triggering a full stonith fence.

* In Cluster->parse_cib(), added parsers for node attributes and resource rules. Also stored the existence of and details of each under the server resources for easier referencing.
* Updated scan-server to check for / add DRBD fence rules as needed.

Scancore APC agent bugs;
* For clarity, converted all '#!no_value!#' and '#!no_connection!#' to use '!!' instead in APC scan agents.
* Fixed a bug to set/clear alerts related to phases disappearing to deal with concurrent logins from different hosts triggering false phase loss alerts.
* Fixed missing variables not being passed to alerts/log entries.

Started more work on anvil-manage-server, but on hold again while the DRBD fencing work is completed.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 2 years ago
parent 5d323c3ddf
commit 4ba1982183
  1. 1
      Anvil/Tools.pm
  2. 113
      Anvil/Tools/Cluster.pm
  3. 4
      Anvil/Tools/Convert.pm
  4. 4
      Anvil/Tools/Remote.pm
  5. 12
      Anvil/Tools/ScanCore.pm
  6. 90
      scancore-agents/scan-apc-pdu/scan-apc-pdu
  7. 1
      scancore-agents/scan-apc-pdu/scan-apc-pdu.xml
  8. 2
      scancore-agents/scan-apc-ups/scan-apc-ups
  9. 2
      scancore-agents/scan-cluster/scan-cluster.sql
  10. 72
      scancore-agents/scan-server/scan-server
  11. 3
      scancore-agents/scan-server/scan-server.xml
  12. 2
      share/words.xml
  13. 1
      tools/anvil-boot-server
  14. 19
      tools/anvil-manage-server
  15. 6
      tools/anvil-provision-server
  16. 26
      tools/fence_pacemaker

@ -1162,6 +1162,7 @@ sub _set_paths
createdb => "/usr/bin/createdb",
createrepo_c => "/usr/bin/createrepo_c",
createuser => "/usr/bin/createuser",
crm_attribute => "/usr/sbin/crm_attribute",
crm_error => "/usr/sbin/crm_error",
crm_resource => "/usr/sbin/crm_resource",
crm_mon => "/usr/sbin/crm_mon",

@ -646,6 +646,8 @@ sub boot_server
}
}
### TODO: If we don't have a node, pick the node with the most VMs already running (by total RAM
### count)
if ($node)
{
$anvil->Cluster->_set_server_constraint({
@ -654,6 +656,8 @@ sub boot_server
});
}
### TODO: Make sure that the drbd fence rule exists in pacemaker and add it, if not.
# Now boot the server.
my ($output, $return_code) = $anvil->System->call({debug => 3, shell_call => $anvil->data->{path}{exe}{pcs}." resource enable ".$server});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
@ -3076,6 +3080,7 @@ sub parse_cib
foreach my $node ($dom->findnodes('/cib/configuration/nodes/node'))
{
my $node_id = $node->{id};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { node_id => $node_id }});
foreach my $variable (sort {$a cmp $b} keys %{$node})
{
next if $variable eq "id";
@ -3108,6 +3113,7 @@ sub parse_cib
foreach my $instance_attributes ($node->findnodes('./instance_attributes'))
{
my $instance_attributes_id = $instance_attributes->{id};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { instance_attributes_id => $instance_attributes_id }});
foreach my $nvpair ($instance_attributes->findnodes('./nvpair'))
{
my $id = $nvpair->{id};
@ -3177,14 +3183,37 @@ sub parse_cib
foreach my $constraint ($dom->findnodes('/cib/configuration/constraints/rsc_location'))
{
my $id = $constraint->{id};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node} = $constraint->{node};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node} = $constraint->{node} ? $constraint->{node} : "";
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{resource} = $constraint->{rsc};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{score} = $constraint->{score};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{score} = $constraint->{score} ? $constraint->{score} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::constraints::location::${id}::node" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node},
"cib::parsed::configuration::constraints::location::${id}::resource" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{resource},
"cib::parsed::configuration::constraints::location::${id}::score" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{score},
}});
# If there's no 'node', this is probably a drbd fence constraint.
if (not $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node})
{
foreach my $rule_id ($constraint->findnodes('./rule'))
{
my $constraint_id = $rule_id->{id};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{score} = $rule_id->{score};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::constraints::location::${id}::constraint::${constraint_id}::score" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{score},
}});
foreach my $expression_id ($rule_id->findnodes('./expression'))
{
my $attribute = $expression_id->{attribute};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{operation} = $expression_id->{operation};
$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{value} = $expression_id->{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::configuration::constraints::location::${id}::constraint::${constraint_id}::attribute::${attribute}::operation" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{operation},
"cib::parsed::configuration::constraints::location::${id}::constraint::${constraint_id}::attribute::${attribute}::value" => $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{value},
}});
}
}
}
}
foreach my $node_state ($dom->findnodes('/cib/status/node_state'))
{
@ -3526,17 +3555,17 @@ sub parse_cib
foreach my $lrm_resource_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}})
{
my $lrm_resource_operations_count = keys %{$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { lrm_resource_operations_count => $lrm_resource_operations_count }});
foreach my $lrm_rsc_op_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}})
{
my $type = $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{type};
my $class = $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{class};
my $operation = $anvil->data->{cib}{parsed}{cib}{status}{node_state}{$id}{lrm_id}{$lrm_id}{lrm_resource}{$lrm_resource_id}{lrm_rsc_op_id}{$lrm_rsc_op_id}{operation};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
lrm_resource_operations_count => $lrm_resource_operations_count,
type => $type,
class => $class,
operation => $operation,
lrm_rsc_op_id => $lrm_rsc_op_id,
's1:lrm_rsc_op_id' => $lrm_rsc_op_id,
's2:type' => $type,
's3:class' => $class,
's4:operation' => $operation,
}});
# Skip unless it's a server.
@ -3566,6 +3595,76 @@ sub parse_cib
"cib::parsed::data::server::${lrm_resource_id}::role" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{role},
}});
}
# Do we have a DRBD fence rule?
$anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"cib::parsed::data::server::${lrm_resource_id}::drbd_fence_rule::exists" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'},
}});
foreach my $id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{configuration}{constraints}{location}})
{
my $node = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{node};
my $resource = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{resource};
my $score = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{score};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"s1:id" => $id,
"s2:node" => $node,
"s3:resource" => $resource,
"s4:score" => $score,
}});
# Is this the server?
next if $resource ne $lrm_resource_id;
next if not exists $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint};
foreach my $constraint_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}})
{
my $score = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{score};
foreach my $attribute (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}})
{
my $operation = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{operation};
my $value = $anvil->data->{cib}{parsed}{configuration}{constraints}{location}{$id}{constraint}{$constraint_id}{attribute}{$attribute}{value};
my $test_key = "location-".$resource."-rule";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
's1:constraint_id' => $constraint_id,
's2:score' => $score,
's3:attribute' => $attribute,
's4:operation' => $operation,
's5:value' => $value,
's6:test_key' => $test_key,
}});
if ($constraint_id eq $test_key)
{
$anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'} = 1;
$anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{attribute} = $attribute;
$anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{operation} = $operation;
$anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{value} = $value;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"s1:cib::parsed::data::server::${lrm_resource_id}::drbd_fence_rule::exists" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'},
"s2:cib::parsed::data::server::${lrm_resource_id}::drbd_fence_rule::attribute" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{attribute},
"s3:cib::parsed::data::server::${lrm_resource_id}::drbd_fence_rule::operation" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{operation},
"s4:cib::parsed::data::server::${lrm_resource_id}::drbd_fence_rule::value" => $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{value},
}});
# Is this refereneced by any node attributes?
foreach my $node_id (sort {$a cmp $b} keys %{$anvil->data->{cib}{parsed}{cib}{node_state}})
{
my $node_name = $anvil->data->{cib}{parsed}{configuration}{nodes}{$node_id}{uname};
my $value = defined $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{$attribute} ? $anvil->data->{cib}{parsed}{cib}{node_state}{$node_id}{$attribute} : "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"s1:node_id" => $node_id,
"s2:node_name" => $node_name,
"s3:value" => $value,
}});
}
}
}
last if $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'};
}
# Did we find it?
last if $anvil->data->{cib}{parsed}{data}{server}{$lrm_resource_id}{drbd_fence_rule}{'exists'};
}
}
}
}

@ -848,8 +848,8 @@ sub format_mmddyy_to_yymmdd
date => $date,
}});
# Sometimes we're passed '--' or '#!no_value!#' which is not strictly an error, so we'll return it back.
if (($date eq "--") or ($date eq "#!no_value!#"))
# Sometimes we're passed '--' or '!!no_value!!' which is not strictly an error, so we'll return it back.
if (($date eq "--") or ($date eq "!!no_value!!"))
{
return($date);
}

@ -828,14 +828,14 @@ sub read_snmp_oid
output => $output,
return_code => $return_code,
}});
my $value = "#!no_value!#";
my $value = "!!no_value!!";
foreach my $line (split/\n/, $output)
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { line => $line }});
if ($line =~ /No Response/i)
{
$value = "#!no_connection!#";
$value = "!!no_connection!!";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { value => $value }});
}
elsif (($line =~ /STRING: "(.*)"$/i) or ($line =~ /STRING: (.*)$/i))

@ -1843,7 +1843,7 @@ sub post_scan_analysis_node
$anvil->Email->send_alerts();
# Pull the server.
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});
@ -1967,7 +1967,7 @@ sub post_scan_analysis_node
$anvil->Email->send_alerts();
# Pull the server.
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});
@ -2122,7 +2122,7 @@ sub post_scan_analysis_node
}
$anvil->Email->send_alerts();
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});
@ -2185,7 +2185,7 @@ sub post_scan_analysis_node
}
$anvil->Email->send_alerts();
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});
@ -2220,7 +2220,7 @@ sub post_scan_analysis_node
}
$anvil->Email->send_alerts();
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});
@ -2272,7 +2272,7 @@ sub post_scan_analysis_node
}
$anvil->Email->send_alerts();
my $shell_call = $anvil->data->{path}{exe}{'anvil-migate-server'}." --target local --server all".$anvil->Log->switches;
my $shell_call = $anvil->data->{path}{exe}{'anvil-migrate-server'}." --target local --server all".$anvil->Log->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0011", variables => { shell_call => $shell_call }});
$anvil->System->call({shell_call => $shell_call});

@ -162,14 +162,6 @@ $anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "
# Read switches
$anvil->Get->switches;
# Too many connections cause the UPS to lag out, so we only run on Strikers.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "striker") && (not $anvil->data->{switches}{force}))
{
$anvil->nice_exit({exit_code => 1});
}
# Handle start-up tasks
my $problem = $anvil->ScanCore->agent_startup({agent => $THIS_FILE});
if ($problem)
@ -177,6 +169,17 @@ if ($problem)
$anvil->nice_exit({exit_code => 1});
}
# The PDUs don't allow multiple connections at the same time. This causes a lot of false alerts when many
# machines try to scan. As such, only Striker dashboards watch APC PDUs.
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if (($host_type ne "striker") && (not $anvil->data->{switches}{force}))
{
# Exit.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 2, key => "scan_apc_pdu_message_0041", variables => { program => $THIS_FILE }});
$anvil->nice_exit({exit_code => 0});
}
if ($anvil->data->{switches}{purge})
{
# This can be called when doing bulk-database purges.
@ -1182,6 +1185,15 @@ WHERE
new_scan_apc_pdu_outlet_state => $new_scan_apc_pdu_outlet_state,
}});
if (($new_scan_apc_pdu_outlet_on_phase ne "!!no_connection!!") or ($new_scan_apc_pdu_outlet_on_phase ne "!!no_value!!"))
{
$anvil->Alert->check_condition_age({clear => 1, name => "scan_apc_pdu::pdu::".$scan_apc_pdu_serial_number."::phase_lost::".$scan_apc_pdu_outlet_number});
}
if (($new_scan_apc_pdu_outlet_state eq "!!no_connection!!") or ($new_scan_apc_pdu_outlet_state eq "!!no_value!!"))
{
$anvil->Alert->check_condition_age({clear => 1, name => "scan_apc_pdu::pdu::".$scan_apc_pdu_serial_number."::outlet_lost::".$scan_apc_pdu_outlet_number});
}
# Do I know about this outlet?
if ($anvil->data->{sql}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu_outlets}{$scan_apc_pdu_outlet_number}{scan_apc_pdu_outlet_uuid})
{
@ -1241,7 +1253,20 @@ WHERE
}
if ($new_scan_apc_pdu_outlet_on_phase ne $old_scan_apc_pdu_outlet_on_phase)
{
# Phase changed, but why tho?
# Phase changed. If the new phase is '!!no_connection!!', it
# could be contention, so check if this has been the case for
# at least five minutes.
if (($new_scan_apc_pdu_outlet_on_phase eq "!!no_connection!!") or ($new_scan_apc_pdu_outlet_on_phase eq "!!no_value!!"))
{
my $age = $anvil->Alert->check_condition_age({name => "scan_apc_pdu::pdu::".$scan_apc_pdu_serial_number."::phase_lost::".$scan_apc_pdu_outlet_number});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { age => $age }});
if ($age < 600)
{
# Ignore it for now.
next;
}
}
$changes = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changes => $changes }});
@ -1262,6 +1287,19 @@ WHERE
}
if ($new_scan_apc_pdu_outlet_state ne $old_scan_apc_pdu_outlet_state)
{
# If we had a contention and the new value is '!!no_connection!!'.
if (($new_scan_apc_pdu_outlet_state eq "!!no_connection!!") or ($new_scan_apc_pdu_outlet_state eq "!!no_value!!"))
{
my $age = $anvil->Alert->check_condition_age({name => "scan_apc_pdu::pdu::".$scan_apc_pdu_serial_number."::outlet_lost::".$scan_apc_pdu_outlet_number});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { age => $age }});
if ($age < 600)
{
# Ignore it for now.
next;
}
}
# This is likely from a fence action, so we make it critical
$changes = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { changes => $changes }});
@ -1658,11 +1696,11 @@ sub clear_phase_low_warning
if ($changed)
{
# Register an alert-cleared event.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0029"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0029", variables => { phase => $scan_apc_pdu_phase_number, name => $pdu_host_name }});
$anvil->Alert->register({
alert_level => "notice",
clear => 1,
message => "scan_apc_pdu_message_0029",
message => "scan_apc_pdu_message_0029,!!phase!".$scan_apc_pdu_phase_number."!!,!!name!".$pdu_host_name."!!",
set_by => $THIS_FILE,
});
}
@ -1683,11 +1721,11 @@ sub set_phase_high_warning
if ($changed)
{
# Register an alert-cleared event.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0027"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0027", variables => { phase => $scan_apc_pdu_phase_number, name => $pdu_host_name }});
$anvil->Alert->register({
alert_level => "notice",
clear => 1,
message => "scan_apc_pdu_message_0027",
message => "scan_apc_pdu_message_0027,!!phase!".$scan_apc_pdu_phase_number."!!,!!name!".$pdu_host_name."!!",
set_by => $THIS_FILE,
});
}
@ -1709,11 +1747,11 @@ sub clear_phase_high_warning
if ($changed)
{
# Register an alert-cleared event.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0028"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0028", variables => { phase => $scan_apc_pdu_phase_number, name => $pdu_host_name }});
$anvil->Alert->register({
alert_level => "notice",
clear => 1,
message => "scan_apc_pdu_message_0028",
message => "scan_apc_pdu_message_0028,!!phase!".$scan_apc_pdu_phase_number."!!,!!name!".$pdu_host_name."!!",
set_by => $THIS_FILE,
});
}
@ -1734,11 +1772,11 @@ sub set_phase_high_critical
if ($changed)
{
# Register an alert-cleared event.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0025"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0025", variables => { phase => $scan_apc_pdu_phase_number, name => $pdu_host_name }});
$anvil->Alert->register({
alert_level => "notice",
clear => 1,
message => "scan_apc_pdu_message_0025",
message => "scan_apc_pdu_message_0025,!!phase!".$scan_apc_pdu_phase_number."!!,!!name!".$pdu_host_name."!!",
set_by => $THIS_FILE,
});
}
@ -1760,11 +1798,11 @@ sub clear_phase_high_critical
if ($changed)
{
# Register an alert-cleared event.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0026"});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_apc_pdu_message_0026", variables => { phase => $scan_apc_pdu_phase_number, name => $pdu_host_name }});
$anvil->Alert->register({
alert_level => "notice",
clear => 1,
message => "scan_apc_pdu_message_0026",
message => "scan_apc_pdu_message_0026,!!phase!".$scan_apc_pdu_phase_number."!!,!!name!".$pdu_host_name."!!",
set_by => $THIS_FILE,
});
}
@ -2088,7 +2126,7 @@ sub gather_pdu_data
"pdu::scan_apc_pdu_uuid::${scan_apc_pdu_uuid}::scan_apc_pdu::scan_apc_pdu_mac_address" => $anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address},
data_type => $data_type,
}});
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address} eq "!!no_value!!")
{
# Some older PDUs use a different OID.
($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address}, $data_type) = $anvil->Remote->read_snmp_oid({
@ -2125,7 +2163,7 @@ sub gather_pdu_data
"pdu::scan_apc_pdu_uuid::${scan_apc_pdu_uuid}::scan_apc_pdu::scan_apc_pdu_mtu_size" => $anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size},
data_type => $data_type,
}});
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size} eq "!!no_value!!")
{
# Some older PDUs use a different OID.
($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size}, $data_type) = $anvil->Remote->read_snmp_oid({
@ -2153,7 +2191,7 @@ sub gather_pdu_data
"pdu::scan_apc_pdu_uuid::${scan_apc_pdu_uuid}::scan_apc_pdu::scan_apc_pdu_link_speed" => $anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed},
data_type => $data_type,
}});
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed} eq "!!no_value!!")
{
# Some older PDUs use a different OID.
($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed}, $data_type) = $anvil->Remote->read_snmp_oid({
@ -2277,7 +2315,7 @@ sub gather_pdu_data
### Convert some unknown values to values we can store in the database
# MAC address
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address} eq "!!no_value!!")
{
$anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mac_address} = "xx:xx:xx:xx:xx:xx";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -2285,7 +2323,7 @@ sub gather_pdu_data
}});
}
# MTU
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size} eq "!!no_value!!")
{
$anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_mtu_size} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -2293,7 +2331,7 @@ sub gather_pdu_data
}});
}
# Link speed
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed} eq "!!no_value!!")
{
$anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu}{scan_apc_pdu_link_speed} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
@ -2301,7 +2339,7 @@ sub gather_pdu_data
}});
}
# Wattage isn't available on older PDUs
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu_variables}{total_wattage_draw} eq "#!no_value!#")
if ($anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu_variables}{total_wattage_draw} eq "!!no_value!!")
{
$anvil->data->{pdu}{scan_apc_pdu_uuid}{$scan_apc_pdu_uuid}{scan_apc_pdu_variables}{total_wattage_draw} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {

@ -81,6 +81,7 @@ A new PDU: [#!variable!name!#] has been found
<key name="scan_apc_pdu_message_0038">- Phase: [#!variable!phase!#] current amperage draw: [#!variable!amps!#].</key>
<key name="scan_apc_pdu_message_0039">- Outlet: [#!variable!outlet!#], on phase: [#!variable!on_phase!#] is: [#!variable!state!#] (name: [#!variable!name!#]).</key>
<key name="scan_apc_pdu_message_0040">The PDU model: [#!variable!model!#] at the IP address: [#!variable!ip_address!#] has vanished! Did the network cable come unplugged?</key>
<key name="scan_apc_pdu_message_0041">APC PDUs only allow one connection at a time. To avoid contention, only Striker dashboards scan APC PDUs. If you want this to run, you can use '--force'. Exiting.</key>
<!-- Units -->
<key name="scan_apc_pdu_unit_0001">Unknown</key>

@ -2070,7 +2070,7 @@ sub gather_ups_data
my ($anvil) = @_;
### TODO: If the network with the UPS is congested, it is possible that, despite connecting to the
### UPS, some OID reads may fail with '#!no_connection!#'. Try to read them a second time in
### UPS, some OID reads may fail with '!!no_connection!!'. Try to read them a second time in
### these cases. Regardless, be sure to check all returned OID values for 'no connection' and
### handle such cases more gracefully.

@ -127,7 +127,7 @@ CREATE TRIGGER trigger_scan_cluster_nodes
-- TODO: We may want to track this data in the future. For now, we're not going to bother as we can always
-- dig through the historical cib.xml.X files on the nodes.
--
-- -- Constraints; Useful for tracking when servers are asked to migate.
-- -- Constraints; Useful for tracking when servers are asked to migrate.
-- CREATE TABLE scan_cluster_constraints (
-- scan_cluster_constraint_uuid uuid primary key,
-- scan_cluster_constraint_scan_cluster_uuid uuid not null, -- The parent scan_cluster_uuid.

@ -91,6 +91,9 @@ record_migration_times($anvil);
# Check if we need to update the websocket stuff.
check_vnc($anvil);
# Check that there's a DRBD fence rule for each server.
check_drbd_fence_rules($anvil);
# Shut down.
$anvil->ScanCore->agent_shutdown({agent => $THIS_FILE});
@ -99,6 +102,73 @@ $anvil->ScanCore->agent_shutdown({agent => $THIS_FILE});
# Functions #
#############################################################################################################
# Check that there's a DRBD fence rule for each server.
sub check_drbd_fence_rules
{
my ($anvil) = @_;
my $host_type = $anvil->Get->host_type();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($host_type ne "node")
{
return(0);
}
my $problem = $anvil->Cluster->parse_cib();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { host_type => $host_type }});
if ($problem)
{
return(0);
}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"cib::parsed::local::ready" => $anvil->data->{cib}{parsed}{'local'}{ready},
}});
if (not $anvil->data->{cib}{parsed}{'local'}{ready})
{
return(0);
}
foreach my $server_name (sort {$a cmp $b} keys %{$anvil->data->{'scan-server'}{server_name}})
{
my $drbd_fence_rule_exists = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{'exists'};
my $drbd_fence_rule_attribute = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{attribute};
my $drbd_fence_rule_operation = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{operation};
my $drbd_fence_rule_value = $anvil->data->{cib}{parsed}{data}{server}{$server_name}{drbd_fence_rule}{value};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:server_name' => $server_name,
's2:drbd_fence_rule_exists' => $drbd_fence_rule_exists,
's3:drbd_fence_rule_attribute' => $drbd_fence_rule_attribute,
's4:drbd_fence_rule_operation' => $drbd_fence_rule_operation,
's5:drbd_fence_rule_value' => $drbd_fence_rule_value,
}});
### TODO: Verify that the other values are correct.
# If it's missing, add it
if (not $drbd_fence_rule_exists)
{
# Create it.
my $variables = {
server => $server_name,
};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 1, key => "scan_server_alert_0019", variables => $variables});
$anvil->Alert->register({alert_level => "notice", message => "scan_server_alert_0019", variables => $variables, set_by => $THIS_FILE});
#
my $shell_call = $anvil->data->{path}{exe}{pcs}." constraint location ".$server_name." rule score=-INFINITY drbd-fenced_".$server_name." eq 1";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { shell_call => $shell_call }});
my ($output, $return_code) = $anvil->System->call({shell_call => $shell_call, source => $THIS_FILE, line => __LINE__});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
output => $output,
return_code => $return_code,
}});
}
}
return(0);
}
#
sub check_vnc
{
@ -755,7 +825,7 @@ DELETED - Marks a server as no longer existing
}
if ($server_host_uuid ne $old_server_host_uuid)
{
# Server migated (to the peer or to a new Anvil!)
# Server migrated (to the peer or to a new Anvil!)
my $variables = {
server => $server_name,
old_host_name => $old_server_host_uuid eq "NULL" ? "NULL" : $anvil->Get->host_name_from_uuid({host_uuid => $old_server_host_uuid}),

@ -110,6 +110,9 @@ The definition for the server: [#!variable!server!#] was changed in the database
#!variable!new_difference!#
======================
</key>
<key name="scan_server_alert_0019">
There was no DRBD fence rule for the: [#!variable!server!#] in the pacemaker configuration. Adding it now.
</key>
<!-- Log entries -->
<key name="scan_server_log_0001">Starting: [#!variable!program!#].</key>

@ -2634,7 +2634,7 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0233">It appears that another instance of 'anvil-safe-start' is already runing. Please wait for it to complete (or kill it manually if needed).</key>
<key name="message_0234">Preparing to rename a server.</key>
<key name="message_0235">Preparing to rename stop this node.</key>
<key name="message_0236">This records how long it took to migate a given server. The average of the last five migations is used to guess how long future migrations will take.</key>
<key name="message_0236">This records how long it took to migrate a given server. The average of the last five migations is used to guess how long future migrations will take.</key>
<key name="message_0237">One or more servers are migrating. While this is the case, ScanCore post-scan checks are not performed.</key>
<key name="message_0238">Preventative live migration has completed.</key>
<key name="message_0239">Preventative live migration has been disabled. We're healthier than our peer, but we will take no action.</key>

@ -9,6 +9,7 @@
#
# TODO:
# - Add support for boot ordering.
# - Check which node we want to put on and set a location constraint to prefer that node before calling pcs.
#
use strict;

@ -123,6 +123,17 @@ sub process_interactive
$anvil->data->{old_config}{ram}{'bytes'} = "";
# Did they specify an Anvil! system?
if (not $anvil->data->{switches}{anvil})
{
# Is this machine in an Anvil!?
$anvil->Database->get_hosts();
my $host_uuid = $anvil->Get->host_uuid();
my $anvil->data->{switches}{anvil} = $anvil->data->{hosts}{host_uuid}{$host_uuid}{anvil_uuid}
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_uuid => $host_uuid,
"switches::anvil" => $anvil->data->{switches}{anvil},
}});
}
if ($anvil->data->{switches}{anvil})
{
$anvil->Get->anvil_from_switch({anvil => $anvil->data->{switches}{anvil}});
@ -142,8 +153,14 @@ sub process_interactive
}});
}
if (not $anvil->data->{target_server}{anvil_uuid})
# If we don't habe a server, show the list of servers.
if (not $anvil->data->{switches}{server})
{
# If we've got an Anvil!, show the VMs on it. Otherwise, show all VMs.
if ($anvil->data->{switches}{anvil_name})
{
}
}
return(0);

@ -945,6 +945,12 @@ sub create_md
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "log_0579", variables => { resource => $anvil->data->{job}{server_name} }});
# If we're not the peer, force this resouroce to Primary.
if (not $anvil->data->{job}{peer_mode})
{
}
$anvil->Job->update_progress({
progress => 50,
message => "job_0191,!!resource!".$anvil->data->{job}{server_name}."!!",

@ -129,6 +129,7 @@ foreach my $i (0..31)
# Record the environment variables
foreach my $key (sort {$a cmp $b} keys %{$conf->{environment}})
{
# $conf->{environment}{DRBD_RESOURCE} -> [srv51-Workstation3]
my $level = $conf->{environment}{$key} eq "" ? 3 : 2;
to_log($conf, {message => "DRBD Environment variable: [$key] -> [".$conf->{environment}{$key}."]", 'line' => __LINE__, level => $level});
}
@ -154,8 +155,17 @@ get_drbd_status($conf);
to_log($conf, {message => "Ready to fence: [".$conf->{cluster}{target_node}."]", 'line' => __LINE__, level => 1});
# Do the deed
# Is there a specific resource?
if ($conf->{environment}{DRBD_RESOURCE})
{
# Prevent the resource from running on the peer.
create_constraint($conf);
}
else
{
# No, do the deed
kill_target($conf);
}
# If we hit here, something very wrong happened.
exit(1);
@ -165,6 +175,20 @@ exit(1);
# Functions #
#############################################################################################################
# This creates a location constraint that prevents the resource / server from running on the peer node.
sub create_constraint
{
my ($conf) = @_;
my $resource = $conf->{environment}{DRBD_RESOURCE};
my $target_node = $conf->{cluster}{target_node};
to_log($conf, {message => "Will now create a location constraint against: [".$resource."] preventing it from running on: [".$target_node."].", 'line' => __LINE__, level => 1});
return(0);
}
# This reads the status of all resources. If we're not all UpToDate, check if the peer is. If the peer is,
# abort. If not, proceed (someone is gouig to have a bad day, but maybe some servers will live)
sub get_drbd_status

Loading…
Cancel
Save