* Updated DRBD->gather_data() to store data on peers so that the peer's LV path and backing disk is recorded. Also fixed a bug in ->get_status() where the return code for local calls was stored as a host name.

* Added the scan-hpacucli scan agent. It's been done for a while and should have been added ages ago.
* Updated anvil-rename-server to get to the point where it will take down the DRBD resources on all machines, but waits if there is a sync under way. It also verifies that the server is off on all systems from virsh's perspective.

Signed-off-by: Digimer <digimer@alteeve.ca>
main
Digimer 4 years ago
parent 591b550085
commit 2e37691116
  1. 49
      Anvil/Tools/DRBD.pm
  2. 2
      ocf/alteeve/server
  3. 7
      scancore-agents/Makefile.am
  4. 5250
      scancore-agents/scan-hpacucli/scan-hpacucli
  5. 572
      scancore-agents/scan-hpacucli/scan-hpacucli.sql
  6. 308
      scancore-agents/scan-hpacucli/scan-hpacucli.xml
  7. 17
      share/words.xml
  8. 552
      tools/anvil-rename-server

@ -563,7 +563,7 @@ sub gather_data
my $this_host_name = $host->{name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => { this_host_name => $this_host_name }});
next if (($this_host_name ne $anvil->Get->host_name) && ($this_host_name ne $anvil->Get->short_host_name));
# Record the details under the hosts
foreach my $volume_vnr ($host->findnodes('./volume'))
{
my $volume = $volume_vnr->{vnr};
@ -573,15 +573,29 @@ sub gather_data
's2:meta_disk' => $meta_disk,
}});
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path} = $volume_vnr->findvalue('./device');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{backing_disk} = $volume_vnr->findvalue('./disk');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor} = $volume_vnr->findvalue('./device/@minor');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{size} = 0;
$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path} = $volume_vnr->findvalue('./device');
$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk} = $volume_vnr->findvalue('./disk');
$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor} = $volume_vnr->findvalue('./device/@minor');
$anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{size} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"s1:new::resource::${resource}::volume::${volume}::device_path" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path},
"s2:new::resource::${resource}::volume::${volume}::backing_disk" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{backing_disk},
"s3:new::resource::${resource}::volume::${volume}::device_minor" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor},
"s1:new::resource::${resource}::host::${this_host_name}::volume::${volume}::device_path" => $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_path},
"s2:new::resource::${resource}::host::${this_host_name}::volume::${volume}::backing_disk" => $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{backing_disk},
"s3:new::resource::${resource}::host::${this_host_name}::volume::${volume}::device_minor" => $anvil->data->{new}{resource}{$resource}{host}{$this_host_name}{volume}{$volume}{device_minor},
}});
# Record the local data only.
if (($this_host_name ne $anvil->Get->host_name) && ($this_host_name ne $anvil->Get->short_host_name))
{
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path} = $volume_vnr->findvalue('./device');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{backing_disk} = $volume_vnr->findvalue('./disk');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor} = $volume_vnr->findvalue('./device/@minor');
$anvil->data->{new}{resource}{$resource}{volume}{$volume}{size} = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
"s1:new::resource::${resource}::volume::${volume}::device_path" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_path},
"s2:new::resource::${resource}::volume::${volume}::backing_disk" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{backing_disk},
"s3:new::resource::${resource}::volume::${volume}::device_minor" => $anvil->data->{new}{resource}{$resource}{volume}{$volume}{device_minor},
}});
}
}
}
@ -1383,14 +1397,8 @@ sub get_status
my $host = $anvil->Get->short_host_name();
if ($anvil->Network->is_local({host => $target}))
{
# Clear the hash where we'll store the data.
if (exists $anvil->data->{drbd}{status}{$host})
{
delete $anvil->data->{drbd}{status}{$host};
}
# Local.
($output, $anvil->data->{drbd}{status}{return_code}) = $anvil->System->call({shell_call => $shell_call});
($output, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->System->call({shell_call => $shell_call});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => $debug, list => {
output => $output,
"drbd::status::${host}::return_code" => $anvil->data->{drbd}{status}{return_code},
@ -1398,14 +1406,8 @@ sub get_status
}
else
{
# Clear the hash where we'll store the data.
$host = $target;
if (exists $anvil->data->{drbd}{status}{$host})
{
delete $anvil->data->{drbd}{status}{$host};
}
# Remote call.
$host = $target;
($output, my $error, $anvil->data->{drbd}{status}{$host}{return_code}) = $anvil->Remote->call({
debug => $debug,
shell_call => $shell_call,
@ -1421,6 +1423,7 @@ sub get_status
}});
}
# Clear the hash where we'll store the data.
if (exists $anvil->data->{drbd}{status}{$host})
{
delete $anvil->data->{drbd}{status}{$host};
@ -1655,6 +1658,8 @@ sub manage_resource
return(1);
}
### TODO: When taking down a resource, check to see if any machine is SyncTarget and take it/them
### down first. See anvil-rename-server -> verify_server_is_off() for the logic.
### TODO: Sanity check the resource name and task requested.
my $shell_call = $anvil->data->{path}{exe}{drbdadm}." ".$task." ".$resource;
my $output = "";

@ -734,7 +734,7 @@ sub stop_drbd_resource
peer => $peer,
}});
# Start DRBD locally.
# Stop the DRBD resource.
foreach my $resource (sort {$a cmp $b} keys %{$anvil->data->{server}{$local_host}{$server}{resource}})
{
my $peer_ip = $anvil->data->{drbd}{config}{$host}{resource}{$resource}{connection}{$peer}{ip_address};

@ -45,6 +45,13 @@ dist_hardware_DATA = \
dist_hardware_SCRIPTS = \
scan-hardware/scan-hardware
hpacuclidir = ${targetdir}/scan-hpacucli
dist_hpacucli_DATA = \
scan-hardware/scan-hpacucli.xml \
scan-hardware/scan-hpacucli.sql
dist_hardware_SCRIPTS = \
scan-hardware/scan-hpacucli
ipmitooldir = ${targetdir}/scan-ipmitool
dist_ipmitool_DATA = \
scan-ipmitool/scan-ipmitool.sql \

File diff suppressed because it is too large Load Diff

@ -0,0 +1,572 @@
-- This is the database schema for the 'hpacucli Scan Agent'.
--
-- Things that change rarely should go in the main tables (even if we won't explicitely watch for them
-- to change with specific alerts).
-- ------------------------------------------------------------------------------------------------------- --
-- Adapter --
-- ------------------------------------------------------------------------------------------------------- --
-- Controller;
-- - Temperature; controller_temperature: [85 °C]
-- - Data; model_name: [Smart Array P420i]
-- - Data; cache_board_present: [True]
-- - Data; controller_status: [OK]
-- - Data; drive_write_cache: [Disabled]
-- - Data; firmware_version: [8.00]
-- - Data; no_battery_write_cache: [Disabled]
--
-- Ignore;
-- - Data; battery_or_capacitor_count: [1]
-- - Data; degraded_performance_optimization: [Disabled]
-- - Data; elevator_sort: [Enabled]
-- - Data; expand_priority: [Medium]
-- - Data; hardware_revision: [B]
-- - Data; inconsistency_repair_policy: [Disabled]
-- - Data; monitor_and_performance_delay: [60 min]
-- - Data; post_prompt_timeout: [0 secs]
-- - Data; queue_depth: [Automatic]
-- - Data; raid_6_-_adg_status: [Enabled]
-- - Data; rebuild_priority: [Medium]
-- - Data; sata_ncq_supported: [True]
-- - Data; spare_activation_mode: [Activate on drive failure]
-- - Data; surface_analysis_inconsistency_notification: [Disabled]
-- - Data; surface_scan_delay: [15 secs]
-- - Data; surface_scan_mode: [Idle]
-- - Data; wait_for_cache_room: [Disabled]
-- - Data; cache_ratio: [10% Read / 90% Write]
-- - Data; total_cache_memory_available: [816 MB]
-- Here is the basic controller information. All connected devices will reference back to this table's
-- 'scan_hpacucli_controller_serial_number' column.
CREATE TABLE scan_hpacucli_controllers (
scan_hpacucli_controller_uuid uuid not null primary key,
scan_hpacucli_controller_host_uuid uuid not null,
scan_hpacucli_controller_serial_number text not null, -- This is the core identifier
scan_hpacucli_controller_model text not null, --
scan_hpacucli_controller_status text not null, --
scan_hpacucli_controller_last_diagnostics numeric not null, -- Collecting diagnostics information is very expensive, so we do it once every hour (or whatever the user chooses).
scan_hpacucli_controller_cache_present text not null, -- "yes" or "no"
scan_hpacucli_controller_drive_write_cache text not null, -- "enabled" or "disabled"
scan_hpacucli_controller_firmware_version text not null, --
scan_hpacucli_controller_unsafe_writeback_cache text not null, -- "enabled" or "disabled"
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_controller_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_hpacucli_controllers OWNER TO admin;
CREATE TABLE history.scan_hpacucli_controllers (
history_id bigserial,
scan_hpacucli_controller_uuid uuid,
scan_hpacucli_controller_host_uuid uuid,
scan_hpacucli_controller_serial_number text,
scan_hpacucli_controller_model text,
scan_hpacucli_controller_status text,
scan_hpacucli_controller_last_diagnostics numeric,
scan_hpacucli_controller_cache_present text,
scan_hpacucli_controller_drive_write_cache text,
scan_hpacucli_controller_firmware_version text,
scan_hpacucli_controller_unsafe_writeback_cache text,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_controllers OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_controllers() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_controllers RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_controllers * FROM scan_hpacucli_controllers WHERE scan_hpacucli_controller_uuid=new.scan_hpacucli_controller_uuid;
INSERT INTO history.scan_hpacucli_controllers
(scan_hpacucli_controller_uuid,
scan_hpacucli_controller_host_uuid,
scan_hpacucli_controller_serial_number,
scan_hpacucli_controller_model,
scan_hpacucli_controller_status,
scan_hpacucli_controller_last_diagnostics,
scan_hpacucli_controller_cache_present,
scan_hpacucli_controller_drive_write_cache,
scan_hpacucli_controller_firmware_version,
scan_hpacucli_controller_unsafe_writeback_cache,
modified_date)
VALUES
(history_scan_hpacucli_controllers.scan_hpacucli_controller_uuid,
history_scan_hpacucli_controllers.scan_hpacucli_controller_host_uuid,
history_scan_hpacucli_controllers.scan_hpacucli_controller_serial_number,
history_scan_hpacucli_controllers.scan_hpacucli_controller_model,
history_scan_hpacucli_controllers.scan_hpacucli_controller_status,
history_scan_hpacucli_controllers.scan_hpacucli_controller_last_diagnostics,
history_scan_hpacucli_controllers.scan_hpacucli_controller_cache_present,
history_scan_hpacucli_controllers.scan_hpacucli_controller_drive_write_cache,
history_scan_hpacucli_controllers.scan_hpacucli_controller_firmware_version,
history_scan_hpacucli_controllers.scan_hpacucli_controller_unsafe_writeback_cache,
history_scan_hpacucli_controllers.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_controllers() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_controllers
AFTER INSERT OR UPDATE ON scan_hpacucli_controllers
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_controllers();
-- Cache;
-- - Temperature; cache_module_temperature: [37 °C]
-- - Temperature; capacitor_temperature: [25 °C]
-- - Data; cache_serial_number
-- - Data; cache_status: [OK]
-- - Data; battery_or_capacitor_status: [OK]
-- - Data; cache_backup_power_source: [Capacitors]
-- - Data; total_cache_size: [1024 MB]
-- This table is used for BBU and FBU caching.
CREATE TABLE scan_hpacucli_cache_modules (
scan_hpacucli_cache_module_uuid uuid not null primary key,
scan_hpacucli_cache_module_host_uuid uuid not null,
scan_hpacucli_cache_module_controller_uuid uuid not null, -- The controller this module is connected to
scan_hpacucli_cache_module_serial_number text not null,
scan_hpacucli_cache_module_status text not null,
scan_hpacucli_cache_module_type text not null,
scan_hpacucli_cache_module_size numeric not null, -- In bytes
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_cache_module_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(scan_hpacucli_cache_module_controller_uuid) REFERENCES scan_hpacucli_controllers(scan_hpacucli_controller_uuid)
);
ALTER TABLE scan_hpacucli_cache_modules OWNER TO admin;
CREATE TABLE history.scan_hpacucli_cache_modules (
history_id bigserial,
scan_hpacucli_cache_module_uuid uuid,
scan_hpacucli_cache_module_host_uuid uuid,
scan_hpacucli_cache_module_controller_uuid uuid,
scan_hpacucli_cache_module_serial_number text,
scan_hpacucli_cache_module_status text,
scan_hpacucli_cache_module_type text,
scan_hpacucli_cache_module_size numeric,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_cache_modules OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_cache_modules() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_cache_modules RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_cache_modules * FROM scan_hpacucli_cache_modules WHERE scan_hpacucli_cache_module_uuid=new.scan_hpacucli_cache_module_uuid;
INSERT INTO history.scan_hpacucli_cache_modules
(scan_hpacucli_cache_module_uuid,
scan_hpacucli_cache_module_host_uuid,
scan_hpacucli_cache_module_controller_uuid,
scan_hpacucli_cache_module_serial_number,
scan_hpacucli_cache_module_status,
scan_hpacucli_cache_module_type,
scan_hpacucli_cache_module_size,
modified_date)
VALUES
(history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_uuid,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_host_uuid,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_controller_uuid,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_serial_number,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_status,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_type,
history_scan_hpacucli_cache_modules.scan_hpacucli_cache_module_size,
history_scan_hpacucli_cache_modules.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_cache_modules() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_cache_modules
AFTER INSERT OR UPDATE ON scan_hpacucli_cache_modules
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_cache_modules();
-- - Array: [A]
-- - Data; array_type: [Data]
-- - Data; interface_type: [SAS]
-- - Data; status: [OK]
-- - Data; unused_space: [0 MB]
-- NOTE: 'ZZZZ' is a fake array used for unallocated disks
-- This stores information about arrays.
CREATE TABLE scan_hpacucli_arrays (
scan_hpacucli_array_uuid uuid not null primary key,
scan_hpacucli_array_host_uuid uuid not null,
scan_hpacucli_array_controller_uuid uuid not null, -- The controller this array is connected to
scan_hpacucli_array_name text not null,
scan_hpacucli_array_type text not null,
scan_hpacucli_array_status text not null,
scan_hpacucli_array_error_message text not null,
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_array_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(scan_hpacucli_array_controller_uuid) REFERENCES scan_hpacucli_controllers(scan_hpacucli_controller_uuid)
);
ALTER TABLE scan_hpacucli_arrays OWNER TO admin;
CREATE TABLE history.scan_hpacucli_arrays (
history_id bigserial,
scan_hpacucli_array_uuid uuid,
scan_hpacucli_array_host_uuid uuid,
scan_hpacucli_array_controller_uuid uuid,
scan_hpacucli_array_name text,
scan_hpacucli_array_type text,
scan_hpacucli_array_status text,
scan_hpacucli_array_error_message text,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_arrays OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_arrays() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_arrays RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_arrays * FROM scan_hpacucli_arrays WHERE scan_hpacucli_array_uuid=new.scan_hpacucli_array_uuid;
INSERT INTO history.scan_hpacucli_arrays
(scan_hpacucli_array_uuid,
scan_hpacucli_array_host_uuid,
scan_hpacucli_array_controller_uuid,
scan_hpacucli_array_name,
scan_hpacucli_array_type,
scan_hpacucli_array_status,
scan_hpacucli_array_error_message,
modified_date)
VALUES
(history_scan_hpacucli_arrays.scan_hpacucli_array_uuid,
history_scan_hpacucli_arrays.scan_hpacucli_array_host_uuid,
history_scan_hpacucli_arrays.scan_hpacucli_array_controller_uuid,
history_scan_hpacucli_arrays.scan_hpacucli_array_name,
history_scan_hpacucli_arrays.scan_hpacucli_array_type,
history_scan_hpacucli_arrays.scan_hpacucli_array_status,
history_scan_hpacucli_arrays.scan_hpacucli_array_error_message,
history_scan_hpacucli_arrays.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_arrays() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_arrays
AFTER INSERT OR UPDATE ON scan_hpacucli_arrays
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_arrays();
-- - Logical Drive: [1]
-- - Data; caching: [Enabled]
-- - Data; cylinders: [65535]
-- - Data; disk_name: [/dev/sda]
-- - Data; drive_type: [Data]
-- - Data; fault_tolerance: [RAID 5]
-- - Data; full_stripe_size: [1280 KB]
-- - Data; heads: [255]
-- - Data; logical_drive_label: [A595BA15001438030E9B24025C4]
-- - Data; mount_points: [/boot 512 MB, / 679.0 GB]
-- - Data; os_status: [LOCKED]
-- - Data; parity_initialization_status: [Initialization Completed]
-- - Data; sectors_per_track: [32]
-- - Data; size: [683.5 GB]
-- - Data; status: [OK]
-- - Data; strip_size: [256 KB]
-- - Data; unique_identifier: [600508B1001C1300C1A2BCEE4BF97677]
-- NOTE: The logical drive '9999' is a fake LD for unallocated disks
-- This stores information about arrays.
CREATE TABLE scan_hpacucli_logical_drives (
scan_hpacucli_logical_drive_uuid uuid not null primary key,
scan_hpacucli_logical_drive_host_uuid uuid not null,
scan_hpacucli_logical_drive_array_uuid uuid not null, -- The array this logical_drive is connected to
scan_hpacucli_logical_drive_name text not null,
scan_hpacucli_logical_drive_caching text not null,
scan_hpacucli_logical_drive_os_device_name text not null,
scan_hpacucli_logical_drive_type text not null,
scan_hpacucli_logical_drive_raid_level text not null,
scan_hpacucli_logical_drive_size numeric not null, -- in bytes
scan_hpacucli_logical_drive_strip_size numeric not null, -- in bytes
scan_hpacucli_logical_drive_stripe_size numeric not null, -- in bytes
scan_hpacucli_logical_drive_status text not null,
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_logical_drive_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(scan_hpacucli_logical_drive_array_uuid) REFERENCES scan_hpacucli_arrays(scan_hpacucli_array_uuid)
);
ALTER TABLE scan_hpacucli_logical_drives OWNER TO admin;
CREATE TABLE history.scan_hpacucli_logical_drives (
history_id bigserial,
scan_hpacucli_logical_drive_uuid uuid,
scan_hpacucli_logical_drive_host_uuid uuid,
scan_hpacucli_logical_drive_array_uuid uuid,
scan_hpacucli_logical_drive_name text,
scan_hpacucli_logical_drive_caching text,
scan_hpacucli_logical_drive_os_device_name text,
scan_hpacucli_logical_drive_type text,
scan_hpacucli_logical_drive_raid_level text,
scan_hpacucli_logical_drive_size numeric,
scan_hpacucli_logical_drive_strip_size numeric,
scan_hpacucli_logical_drive_stripe_size numeric,
scan_hpacucli_logical_drive_status text,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_logical_drives OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_logical_drives() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_logical_drives RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_logical_drives * FROM scan_hpacucli_logical_drives WHERE scan_hpacucli_logical_drive_uuid=new.scan_hpacucli_logical_drive_uuid;
INSERT INTO history.scan_hpacucli_logical_drives
(scan_hpacucli_logical_drive_uuid,
scan_hpacucli_logical_drive_host_uuid,
scan_hpacucli_logical_drive_array_uuid,
scan_hpacucli_logical_drive_name,
scan_hpacucli_logical_drive_caching,
scan_hpacucli_logical_drive_os_device_name,
scan_hpacucli_logical_drive_type,
scan_hpacucli_logical_drive_raid_level,
scan_hpacucli_logical_drive_size,
scan_hpacucli_logical_drive_strip_size,
scan_hpacucli_logical_drive_stripe_size,
scan_hpacucli_logical_drive_status,
modified_date)
VALUES
(history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_uuid,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_host_uuid,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_array_uuid,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_name,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_caching,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_os_device_name,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_type,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_raid_level,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_size,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_strip_size,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_stripe_size,
history_scan_hpacucli_logical_drives.scan_hpacucli_logical_drive_status,
history_scan_hpacucli_logical_drives.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_logical_drives() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_logical_drives
AFTER INSERT OR UPDATE ON scan_hpacucli_logical_drives
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_logical_drives();
-- - Physical Drive: [1I:1:1], sn: [6XM4E1R60000M528BGFK]
-- - Temperature; current_temperature: [31 °C]
-- - Temperature; maximum_temperature: [40 °C]
-- - Data; drive_type: [Data Drive]
-- - Data; size: [146 GB]
-- - Data; status: [OK]
-- - Data; interface_type: [SAS]
-- - Data; model: [HP EH0146FBQDC]
-- - Data; rotational_speed: [15000]
-- - Data; phy_count: [2]
-- - Data; phy_transfer_rate: [6.0Gbps, Unknown]
-- - Data; firmware_revision: [HPD5]
-- - Data; drive_authentication_status: [OK]
-- - Data; carrier_application_version: [11]
-- - Data; carrier_bootloader_version: [6]
-- This stores information about physical disks.
CREATE TABLE scan_hpacucli_physical_drives (
scan_hpacucli_physical_drive_uuid uuid not null primary key,
scan_hpacucli_physical_drive_host_uuid uuid not null,
scan_hpacucli_physical_drive_logical_drive_uuid uuid not null,
scan_hpacucli_physical_drive_serial_number text not null,
scan_hpacucli_physical_drive_model text not null,
scan_hpacucli_physical_drive_interface text not null,
scan_hpacucli_physical_drive_status text not null,
scan_hpacucli_physical_drive_size numeric not null, -- In bytes
scan_hpacucli_physical_drive_type text not null,
scan_hpacucli_physical_drive_rpm numeric not null, -- '0' for SSDs.
scan_hpacucli_physical_drive_temperature numeric not null, -- In celslius
scan_hpacucli_physical_drive_last_failure_reason text not null, -- This is usually an empty string
scan_hpacucli_physical_drive_port text not null, -- These three form the ID for the drive; <port>:<box>:<bay>
scan_hpacucli_physical_drive_box text not null,
scan_hpacucli_physical_drive_bay text not null,
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_physical_drive_host_uuid) REFERENCES hosts(host_uuid),
FOREIGN KEY(scan_hpacucli_physical_drive_logical_drive_uuid) REFERENCES scan_hpacucli_logical_drives(scan_hpacucli_logical_drive_uuid)
);
ALTER TABLE scan_hpacucli_physical_drives OWNER TO admin;
CREATE TABLE history.scan_hpacucli_physical_drives (
history_id bigserial,
scan_hpacucli_physical_drive_uuid uuid,
scan_hpacucli_physical_drive_host_uuid uuid,
scan_hpacucli_physical_drive_logical_drive_uuid uuid,
scan_hpacucli_physical_drive_serial_number text,
scan_hpacucli_physical_drive_model text,
scan_hpacucli_physical_drive_interface text,
scan_hpacucli_physical_drive_status text,
scan_hpacucli_physical_drive_size numeric,
scan_hpacucli_physical_drive_type text,
scan_hpacucli_physical_drive_rpm numeric,
scan_hpacucli_physical_drive_temperature numeric,
scan_hpacucli_physical_drive_last_failure_reason text,
scan_hpacucli_physical_drive_port text,
scan_hpacucli_physical_drive_box text,
scan_hpacucli_physical_drive_bay text,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_physical_drives OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_physical_drives() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_physical_drives RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_physical_drives * FROM scan_hpacucli_physical_drives WHERE scan_hpacucli_physical_drive_uuid=new.scan_hpacucli_physical_drive_uuid;
INSERT INTO history.scan_hpacucli_physical_drives
(scan_hpacucli_physical_drive_uuid,
scan_hpacucli_physical_drive_host_uuid,
scan_hpacucli_physical_drive_logical_drive_uuid,
scan_hpacucli_physical_drive_serial_number,
scan_hpacucli_physical_drive_model,
scan_hpacucli_physical_drive_interface,
scan_hpacucli_physical_drive_status,
scan_hpacucli_physical_drive_size,
scan_hpacucli_physical_drive_type,
scan_hpacucli_physical_drive_rpm,
scan_hpacucli_physical_drive_temperature,
scan_hpacucli_physical_drive_last_failure_reason,
scan_hpacucli_physical_drive_port,
scan_hpacucli_physical_drive_box,
scan_hpacucli_physical_drive_bay,
modified_date)
VALUES
(history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_uuid,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_host_uuid,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_logical_drive_uuid,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_serial_number,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_model,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_interface,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_status,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_size,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_type,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_rpm,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_temperature,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_last_failure_reason,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_port,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_box,
history_scan_hpacucli_physical_drives.scan_hpacucli_physical_drive_bay,
history_scan_hpacucli_physical_drives.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_physical_drives() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_physical_drives
AFTER INSERT OR UPDATE ON scan_hpacucli_physical_drives
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_physical_drives();
-- ------------------------------------------------------------------------------------------------------- --
-- Each data type has several variables that we're not storing in the component-specific tables. To do so --
-- would be to create massive tables that would miss variables not shown for all controllers or when new --
-- variables are added or renamed. So this table is used to store all those myriade of variables. Each --
-- entry will reference the table it is attached to and the UUID of the record in that table. The column --
-- 'scan_hpacucli_variable_is_temperature' will be used to know what data is a temperature and will be then --
-- used to inform on the host's thermal health. --
-- ------------------------------------------------------------------------------------------------------- --
-- This stores various variables found for a given controller but not explicitely checked for (or that
-- change frequently).
CREATE TABLE scan_hpacucli_variables (
scan_hpacucli_variable_uuid uuid not null primary key,
scan_hpacucli_variable_host_uuid uuid not null,
scan_hpacucli_variable_source_table text not null,
scan_hpacucli_variable_source_uuid uuid not null,
scan_hpacucli_variable_is_temperature boolean not null default FALSE,
scan_hpacucli_variable_name text not null,
scan_hpacucli_variable_value text not null,
modified_date timestamp with time zone not null,
FOREIGN KEY(scan_hpacucli_variable_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE scan_hpacucli_variables OWNER TO admin;
CREATE TABLE history.scan_hpacucli_variables (
history_id bigserial,
scan_hpacucli_variable_uuid uuid,
scan_hpacucli_variable_host_uuid uuid,
scan_hpacucli_variable_source_table text,
scan_hpacucli_variable_source_uuid uuid,
scan_hpacucli_variable_is_temperature boolean,
scan_hpacucli_variable_name text,
scan_hpacucli_variable_value text,
modified_date timestamp with time zone
);
ALTER TABLE history.scan_hpacucli_variables OWNER TO admin;
CREATE FUNCTION history_scan_hpacucli_variables() RETURNS trigger
AS $$
DECLARE
history_scan_hpacucli_variables RECORD;
BEGIN
SELECT INTO history_scan_hpacucli_variables * FROM scan_hpacucli_variables WHERE scan_hpacucli_variable_uuid=new.scan_hpacucli_variable_uuid;
INSERT INTO history.scan_hpacucli_variables
(scan_hpacucli_variable_uuid,
scan_hpacucli_variable_host_uuid,
scan_hpacucli_variable_source_table,
scan_hpacucli_variable_source_uuid,
scan_hpacucli_variable_is_temperature,
scan_hpacucli_variable_name,
scan_hpacucli_variable_value,
modified_date)
VALUES
(history_scan_hpacucli_variables.scan_hpacucli_variable_uuid,
history_scan_hpacucli_variables.scan_hpacucli_variable_host_uuid,
history_scan_hpacucli_variables.scan_hpacucli_variable_source_table,
history_scan_hpacucli_variables.scan_hpacucli_variable_source_uuid,
history_scan_hpacucli_variables.scan_hpacucli_variable_is_temperature,
history_scan_hpacucli_variables.scan_hpacucli_variable_name,
history_scan_hpacucli_variables.scan_hpacucli_variable_value,
history_scan_hpacucli_variables.modified_date);
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_scan_hpacucli_variables() OWNER TO admin;
CREATE TRIGGER trigger_scan_hpacucli_variables
AFTER INSERT OR UPDATE ON scan_hpacucli_variables
FOR EACH ROW EXECUTE PROCEDURE history_scan_hpacucli_variables();
-- - Array: [ZZZZ]
-- - Logical Drive: [9999]
-- - Physical Drive: [2I:1:8], sn: [11428100010010790594]
-- - Data; carrier_application_version: [11]
-- - Data; carrier_bootloader_version: [6]
-- - Data; device_number: [380]
-- - Data; drive_authentication_status: [OK]
-- - Data; drive_type: [Unassigned Drive]
-- - Data; firmware_revision: [1.0]
-- - Data; firmware_version: [RevB]
-- - Data; interface_type: [Solid State SATA]
-- - Data; model: [SRCv8x6G]
-- - Data; phy_count: [1]
-- - Data; phy_transfer_rate: [6.0Gbps]
-- - Data; sata_ncq_capable: [True]
-- - Data; sata_ncq_enabled: [True]
-- - Data; size: [128.0 GB]
-- - Data; ssd_smart_trip_wearout: [Not Supported]
-- - Data; status: [OK]
-- - Data; vendor_id: [PMCSIERA]
-- - Data; wwid: [5001438030E9B24F]

@ -0,0 +1,308 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Company: Alteeve's Niche, Inc.
License: GPL v2+
Author: Madison Kelly <mkelly@alteeve.ca>
NOTE: All string keys MUST be prefixed with the agent name! ie: 'scan_hpacucli_log_0001'.
-->
<words>
<meta version="3.0.0" languages="en_CA,jp"/>
<!-- Canadian English -->
<language name="en_CA" long_name="Canadian English" description="ScanCore scan agent for HPE machines with RAID controllers managed by the 'hpacucli' tool.">
<!-- Strings that can change by rebranding -->
<key name="scan_hpacucli_brand_0001">HP Enterprise RAID controller scan agent using the 'hpacucli' tool</key>
<!-- Storcli Agent-specific messages -->
<key name="scan_hpacucli_message_0001">Starting #!string!scan_hpacucli_brand_0001!#:</key>
<key name="scan_hpacucli_message_0002">#!free!#</key>
<!-- Warnings -->
<key name="scan_hpacucli_warning_0001">Diagnostics not available for the drive: [#!variable!serial_number!#] in port: [#!variable!port!#], box: [#!variable!box!#], bay: [#!variable!bay!#]. Unable to predict failures! Is this a third-party drive?</key>
<key name="scan_hpacucli_warning_0002">The RAID controller's properties have changed:
- Model: ................. [#!variable!old_model!#] -> [#!variable!new_model!#]
- Serial Number: ......... [#!variable!old_serial_number!#] -> [#!variable!new_serial_number!#]
- Status: ................ [#!variable!old_status!#] -> [#!variable!new_status!#]
- Status: ................ [#!variable!old_alarm_state!#] -> [#!variable!new_alarm_state!#]
- Cache Present: ......... [#!variable!old_cache_present!#] -> [#!variable!new_cache_present!#]
- Drive Write Cache: ..... [#!variable!old_drive_write_cache!#] -> [#!variable!new_drive_write_cache!#]
- Firmware Version: ...... [#!variable!old_firmware_version!#] -> [#!variable!new_firmware_version!#]
- Unsafe Write-Back Cache: [#!variable!old_unsafe_writeback_cache!#] -> [#!variable!new_unsafe_writeback_cache!#]
</key>
<key name="scan_hpacucli_warning_0003">The RAID controller has returned:
- Model: ................. [#!variable!new_model!#]
- Serial Number: ......... [#!variable!new_serial_number!#]
- Status: ................ [#!variable!new_status!#]
- Status: ................ [#!variable!new_alarm_state!#]
- Cache Present: ......... [#!variable!new_cache_present!#]
- Drive Write Cache: ..... [#!variable!new_drive_write_cache!#]
- Firmware Version: ...... [#!variable!new_firmware_version!#]
- Unsafe Write-Back Cache: [#!variable!new_unsafe_writeback_cache!#]
</key>
<key name="scan_hpacucli_warning_0004">The RAID controller's cache module has changed:
- Serial Number: ......... [#!variable!old_serial_number!#] -> [#!variable!new_serial_number!#]
- Controller: ............ [#!variable!old_controller_serial_number!#] -> [#!variable!new_controller_serial_number!#]
- Status: ................ [#!variable!old_status!#] -> [#!variable!new_status!#]
- Type: .................. [#!variable!old_type!#] -> [#!variable!new_type!#]
- Size: .................. [#!variable!say_old_size!#] -> [#!variable!say_new_size!#]
</key>
<key name="scan_hpacucli_warning_0005">The RAID controller's cache module has returned:
- Serial Number: ......... [#!variable!new_serial_number!#]
- Controller: ............ [#!variable!new_controller_serial_number!#]
- Status: ................ [#!variable!new_status!#]
- Type: .................. [#!variable!new_type!#]
- Size: .................. [#!variable!say_new_size!#]
</key>
<!-- Errors -->
<key name="scan_hpacucli_error_0001">The 'hpacucli' program was not found at: [#!variable!path!#], exiting.</key>
<key name="scan_hpacucli_error_0002">The 'hpacucli' program was found at: [#!variable!path!#], but it is not executable. exiting.</key>
<key name="scan_hpacucli_error_0003">No HPE-type RAID controllers were found, exiting.</key>
<key name="scan_hpacucli_error_0004">Failed to find the serial number for the adapter: [#!variable!adapter!#]. Please check the output of '#!data!path::hpacucli!# #!data!sys::arguments::controller_info!#' and look for the 'Serial Number = X' string. Exiting.</key>
<key name="scan_hpacucli_error_0005">The attempt to generate the XML diagnostics file: [#!variable!file!#] appears to have failed.</key>
<key name="scan_hpacucli_error_0006">Non-numeric value in a numeric variable; controller last diagnostics (unix time): [#!variable!last_diagnostics!#]. This is likely a program error.</key>
<key name="scan_hpacucli_error_0007">Non-numeric value in a numeric variable; cache module size: [#!variable!size!#]. This is likely a program error.</key>
<key name="scan_hpacucli_error_0008">Non-numeric value in a numeric variable; array unused space: [#!variable!unused_space!#]. This is likely a program error.</key>
<key name="scan_hpacucli_error_0009">Non-numeric value in a numeric variable; logical drive size: [#!variable!logical_drive_size!#], strip size: [#!variable!strip_size!#], or stripe size: [#!variable!stripe_size!#]. This is likely a program error.</key>
<key name="scan_hpacucli_error_0010">Non-numeric value in a numeric variable; drive size: [#!variable!size!#], RPM: [#!variable!rpm!#] or temperature: [#!variable!temperature!#]. This is likely a program error.</key>
<key name="scan_hpacucli_error_0011">Failed to find the serial number of the physical drive at the following location:
- RAID Controller Serial Number: [#!variable!serial_number!#]
- Array Name: .................. [#!variable!array_name!#]
- Logical Drive Name: .......... [#!variable!logical_drive_name!#]
- Port: ........................ [#!variable!port!#]
- Box: ......................... [#!variable!box!#]
- Bay: ......................... [#!variable!bay!#]
</key>
<!-- Notices -->
<key name="scan_hpacucli_note_0001">A new HP RAID controller has been found.
- Model Name: .............. [#!variable!model!#]
- Serial Number: ........... [#!variable!serial_number!#]
- Status: .................. [#!variable!status!#]
- Drive Write Cache: ....... [#!variable!drive_write_cache!#]
- Firmware: ................ [#!variable!firmware_version!#]
- Write-Back on bad FBU/BBU: [#!variable!unsafe_writeback_cache!#]
</key>
<key name="scan_hpacucli_note_0002">A new cache module has been found.
- Serial Number: [#!variable!serial_number!#]
- Cache Size: .. [#!variable!cache_size!#]
- Status: ...... [#!variable!status!#]
- Type: ........ [#!variable!type!#]
</key>
<key name="scan_hpacucli_note_0003">Other detected variables (if any):</key>
<key name="scan_hpacucli_note_0004">- #!variable!name!#: [#!variable!value!#]</key>
<key name="scan_hpacucli_note_0005">
The temperature sensor: [#!variable!sensor_name!#] on the controller: [#!variable!serial_number!#] is above the high critical temperature of: [#!variable!high_critical_temperature!#]!:
- #!variable!name!#: [#!variable!value!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0006">
The temperature sensor: [#!variable!sensor_name!#] on the controller: [#!variable!serial_number!#] is above the high warning temperature of: [#!variable!high_warning_temperature!#]. It will go critical at: [#!variable!high_critical_temperature!#]!:
- #!variable!name!#: [#!variable!value!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0007">
The temperature sensor: [#!variable!sensor_name!#] on the controller: [#!variable!serial_number!#] is below the low critical temperature of: [#!variable!low_critical_temperature!#]!:
- #!variable!name!#: [#!variable!value!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0008">
The temperature sensor: [#!variable!sensor_name!#] on the controller: [#!variable!serial_number!#] is below the low warning temperature of: [#!variable!low_warning_temperature!#]. It will go critical at: [#!variable!low_critical_temperature!#]!:
- #!variable!name!#: [#!variable!value!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0009">- The variable: [#!variable!name!#] has changed:
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0010">- Controller: [#!variable!serial_number!#]: '#!variable!name!#' has returned: [#!variable!new_value!#]</key>
<key name="scan_hpacucli_note_0011">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' is no longer critically hot.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0012">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' is no longer hot enough to be in a warning state.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0013">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' is no longer critically cold.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0014">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' is no longer cold enough to be in a warning state.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0015">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' has jumped: [#!variable!delta!#] since the last scan.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0016">- Controller: [#!variable!serial_number!#]: Temperature sensor: '#!variable!name!#' has dropped: [#!variable!delta!#] since the last scan.
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0017">The HP RAID controller: [#!variable!model!#] with the serial number: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0018">The HP RAID cache module with the serial number: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0019">The temperature sensor: [#!variable!name!#] on the HP RAID controller with the serial number: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0020">The sensor: [#!variable!name!#] on the HP RAID controller with the serial number: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0021">A new array has been found: [#!variable!name!#]. It is a: [#!variable!type!#] array and it's status is: [#!variable!status!#]!</key>
<key name="scan_hpacucli_note_0022">A new array has been found: [#!variable!name!#] and it appears to have a problem. It is a: [#!variable!type!#] array and it's status is: [#!variable!status!#]. The error message is: [#!variable!error!#]!</key>
<key name="scan_hpacucli_note_0023">The status of the HP RAID array: [#!variable!name!#] has changed:
- [#!variable!old_status!#] -> [#!variable!new_status!#]
</key>
<key name="scan_hpacucli_note_0024">The HP RAID array: [#!variable!name!#] has changed is back to a healthy state.</key>
<key name="scan_hpacucli_note_0025">The HP RAID array: [#!variable!name!#] has moved to a new controller.
- [#!variable!old_serial_number!#] -> [#!variable!new_serial_number!#]
</key>
<key name="scan_hpacucli_note_0026">The error message for the HP RAID array: [#!variable!name!#] has changed:
- [#!variable!old_error_message!#] -> [#!variable!new_error_message!#]
</key>
<key name="scan_hpacucli_note_0027">The HP RAID array: [#!variable!name!#] has cleared the old error message: [#!variable!old_error_message!#]</key>
<key name="scan_hpacucli_note_0028">The HP RAID array: [#!variable!name!#] has an error message: [#!variable!new_error_message!#]</key>
<key name="scan_hpacucli_note_0029">The error message for the HP RAID array: [#!variable!name!#] has changed:
- [#!variable!old_error_message!#] -> [#!variable!new_error_message!#]
</key>
<key name="scan_hpacucli_note_0030">The HP RAID array: [#!variable!name!#] type has changed:
- [#!variable!old_type!#] -> [#!variable!new_type!#]
</key>
<key name="scan_hpacucli_note_0031">The HP RAID array: [#!variable!name!#] on the controller: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0032">The HP RAID array: [#!variable!name!#] on the controller: [#!variable!serial_number!#] has returned.</key>
<key name="scan_hpacucli_note_0033">The HP RAID array: [#!variable!name!#] has a new logical drive: [#!variable!logical_drive!#]:
- Status: .............. [#!variable!new_status!#]
- Write-Back Caching: .. [#!variable!new_caching!#]
- Device Name in the OS: [#!variable!new_os_device_name!#]
- Drive Type: .......... [#!variable!new_type!#]
- RAID Level: .......... [#!variable!new_raid_level!#]
- Logical Drive Size: .. [#!variable!new_size!#]
- Strip Size: .......... [#!variable!new_strip_size!#]
- Stripe Size: ......... [#!variable!new_stripe_size!#]
</key>
<key name="scan_hpacucli_note_0034">The write-back caching on the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has changed!
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0035">The write-back caching has been re-enabled on the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#].</key>
<key name="scan_hpacucli_note_0036">The write-back caching has been disabled on the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#].
WARNING: Storage performance can be significantly impacted when write-back caching has been disabled! This can be caused by a failed battery or capacitor on the RAID controller.
</key>
<key name="scan_hpacucli_note_0037">The status on the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has changed!
- [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0038">The HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] is back to normal.</key>
<key name="scan_hpacucli_note_0039">The HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] is has begun recovering.
NOTE: The array is still degraded, and will remain so until the rebuild process is complete. How long this rebuild will take is a factor of the replacement drive's speed and size.
</key>
<key name="scan_hpacucli_note_0040">The rebuild of the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] is now at: [#!variable!recovered!# %].</key>
<key name="scan_hpacucli_note_0041">The HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] is degraded!
WARNING: This is generally caused by a drive failing or having been removed. Please replace the drive as soon as possible.
</key>
<key name="scan_hpacucli_note_0042">The HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has changed!
- Drive Name in OS: [#!variable!old_os_drive_name!#] -> [#!variable!new_os_drive_name!#]
- Drive Type: ..... [#!variable!old_type!#] -> [#!variable!new_type!#]
- RAID Level: ..... [#!variable!old_raid_level!#] -> [#!variable!new_raid_level!#]
- Drive Size: ..... [#!variable!old_size!#] -> [#!variable!new_size!#]
- Strip Size: ..... [#!variable!old_strip_size!#] -> [#!variable!new_strip_size!#]
- Stripe Size: .... [#!variable!old_stripe_size!#] -> [#!variable!new_stripe_size!#]
</key>
<key name="scan_hpacucli_note_0043">A variable on the HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has changed!
- #!variable!variable_name!#: [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0044">The HP RAID logical drive: [#!variable!logical_drive!#] has vanished!</key>
<key name="scan_hpacucli_note_0045">The HP RAID logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has returned!</key>
<key name="scan_hpacucli_note_0046">A new physical disk was found on the HP RAID controller: [#!variable!controller_serial_number!#]:
- Serial Number: ..... [#!variable!drive_serial_number!#]
- Model: ............. [#!variable!model!#]
- Size: .............. [#!variable!size!#]
- Status: ............ [#!variable!status!#]
- Temperature: ....... [#!variable!temperature!#]
- Interface: ......... [#!variable!interface!#]
- Location: .......... [#!variable!port!#:#!variable!box!#:#!variable!bay!#] (Port:Box:Bay)
- Type: .............. [#!variable!rpm!#]
- Array: ............. [#!variable!array_name!#]
- Logical Drive: ..... [#!variable!logical_drive_name!#]
- Last Failure Reason: [#!variable!last_failure_reason!#]
</key>
<key name="scan_hpacucli_note_0047">The physical disk: [#!variable!drive_serial_number!#] has moved to a new logical disk.
- Host: ........................ [#!variable!old_host_name!#] -> [#!variable!new_host_name!#]
- RAID Controller Serial Number: [#!variable!old_controller_serial_number!#] -> [#!variable!new_controller_serial_number!#]
- Array Name: .................. [#!variable!old_array_name!#] -> [#!variable!new_array_name!#]
- Logical Drive Name: .......... [#!variable!old_logical_drive_name!#] -> [#!variable!new_logical_drive_name!#]
</key>
<key name="scan_hpacucli_note_0048">The status of the physical disk [#!variable!serial_number!#] has changed! [#!variable!old_status!#] -> [#!variable!new_status!#]</key>
<key name="scan_hpacucli_note_0049">The physical disk: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0050">The physical disk: [#!variable!serial_number!#] has returned.</key>
<key name="scan_hpacucli_note_0051">The physical disk: [#!variable!serial_number!#] is back to normal.</key>
<key name="scan_hpacucli_note_0052">The physical disk: [#!variable!serial_number!#] has changed in an unusual way:
- Model: ............. [#!variable!old_model!#] -> [#!variable!new_model!#]
- Interface: ......... [#!variable!old_interface!#] -> [#!variable!new_interface!#]
- Size: .............. [#!variable!old_size!#] -> [#!variable!new_size!#]
- RPM: ............... [#!variable!old_rpm!#] -> [#!variable!new_rpm!#]
- Last Failure Reason: [#!variable!old_last_failure_reason!#] -> [#!variable!new_last_failure_reason!#]
- Location: .......... [#!variable!old_port!#:#!variable!old_box!#:#!variable!old_bay!#] -> [#!variable!new_port!#:#!variable!new_box!#:#!variable!new_bay!#] (Port:Box:Bay)
</key>
<key name="scan_hpacucli_note_0053">The temperature of the physical disk [#!variable!serial_number!#] has changed: [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]</key>
<key name="scan_hpacucli_note_0054">The temperature of the physical disk: [#!variable!serial_number!#] is no longer critically hot.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0055">The temperature of the physical disk: [#!variable!serial_number!#] is no longer hot enough to be in a warning state.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0056">The temperature of the physical disk: [#!variable!serial_number!#] is no longer critically cold.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0057">The temperature of the physical disk: [#!variable!serial_number!#] is no longer cold enough to be in a warning state.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0058">The temperature of the physical disk: [#!variable!serial_number!#] has jumped: [#!variable!delta!#] since the last scan.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0059">The temperature of the physical disk: [#!variable!serial_number!#] has dropped: [#!variable!delta!#] since the last scan.
- [#!variable!old_temperature!#] -> [#!variable!new_temperature!#]
</key>
<key name="scan_hpacucli_note_0060">
The temperature of the physical drive: [#!variable!serial_number!#] is above the high critical temperature of: [#!variable!high_critical_temperature!#]!:
- Current temperature: [#!variable!new_temperature!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough temperature sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0061">
The temperature of the physical drive: [#!variable!serial_number!#] is above the high warning temperature of: [#!variable!high_warning_temperature!#]. It will go critical at: [#!variable!high_critical_temperature!#]!:
- Current temperature: [#!variable!new_temperature!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough temperature sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0062">
The temperature of the physical drive: [#!variable!serial_number!#] is below the low critical temperature of: [#!variable!low_critical_temperature!#]!:
- Current temperature: [#!variable!new_temperature!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0063">
The temperature of the physical drive: [#!variable!serial_number!#] is below the low warning temperature of: [#!variable!low_warning_temperature!#]. It will go critical at: [#!variable!low_critical_temperature!#]!:
- Current temperature: [#!variable!new_temperature!#]
NOTE: If the other node is cooler, automatic live migration of hosted servers (if any) will occur soon.
NOTE: If enough sensors go into warning or critical on both nodes, load shedding will occur to slow room heating.
WARNING: If enough sensors go critical, emergency power off will occure to protect the node from damage.
</key>
<key name="scan_hpacucli_note_0064">The new physical disk with the serial number: [#!variable!drive_serial_number!#] is not healthy!</key>
<key name="scan_hpacucli_note_0065">A variable on the physical drive: [#!variable!serial_number!#] has changed:
- #!variable!variable_name!#: [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0066">A variable on the logical drive: [#!variable!logical_drive!#] under the array: [#!variable!array!#] on the controller: [#!variable!serial_number!#] has changed:
- #!variable!variable_name!#: [#!variable!old_value!#] -> [#!variable!new_value!#]
</key>
<key name="scan_hpacucli_note_0067">The variable: [#!variable!name!#] on the physical drive with the serial number: [#!variable!serial_number!#] has vanished!</key>
<key name="scan_hpacucli_note_0068">The diagnostics variable: [#!variable!name!#] on the physical drive with the serial number: [#!variable!serial_number!#] has vanished!</key>
<!-- Log strings -->
<key name="scan_hpacucli_log_0001">Found: [#!variable!count!#] controller(s).</key>
<key name="scan_hpacucli_log_0002">The thermal sensor named: [#!variable!sensor_name!#], on: [#!variable!sensor_host!#] has not changed.</key>
<key name="scan_hpacucli_log_0003">Running: [#!variable!shell_call!#] to gather drive diagnostics. This will take up to fifteen seconds to run.</key>
</language>
</words>

@ -387,6 +387,12 @@ The attempt to start the servers appears to have failed. The return code '0' was
#!variable!output!#
====
</key>
<key name="error_0276"><![CDATA[No server specified to rename. Please use '--server <name>' or '--server-uuid <UUID>.]]></key>
<key name="error_0277">Could not find the server: [#!variable!server!#] on this Anvil! in the database.</key>
<key name="error_0278">This host is not a node, unable to rename the server from here.</key>
<key name="error_0279"><![CDATA[The new name for the server was not given. Please use '--new-name <name>'. The new name can not contain spaces.]]></key>
<key name="error_0280"><![CDATA[The new name for the server: [#!variable!new_name!#] is not valid. The new name can not contain spaces.]]></key>
<key name="error_0281">The server wasn't found in the cluster configuration... Did a previous attempt to rename fail? Aborting.</key>
<!-- Files templates -->
<!-- NOTE: Translating these files requires an understanding of which likes are translatable -->
@ -799,6 +805,16 @@ It should be provisioned in the next minute or two.</key>
<key name="job_0292">The server: [#!variable!server!#] has been migrated to: [#!variable!target!#].</key>
<key name="job_0293">The server: [#!variable!server!#] will now be migrated to: [#!variable!target!#]. This could take some time! How much RAM is allocated to this server, the speed of the back-channel network and how busy the server is all contribute to migration time. Please be patient!</key>
<key name="job_0294">The server: [#!variable!server!#] has been asked to migrate. We are not waiting for it to complete.</key>
<key name="job_0295">The cluster is up and both nodes are ready.</key>
<key name="job_0296">The cluster is up and both one or both nodes are not yet ready. Will wait until both are up. Current states; [#!variable!local_name!#] is: [#!variable!local_ready!#], and [#!variable!peer_name!#] is: [#!variable!peer_ready!#].</key>
<key name="job_0297">The peer: [#!variable!host_name!#] can't be reached yet. Will wait for it to be available before proceeding with the rename.</key>
<key name="job_0298">The peer(s) of this server are accessible. Ready to proceed with the rename.</key>
<key name="job_0299">The server: [#!variable!server!#] status is: [#!variable!status!#]. Waiting for it to be off.</key>
<key name="job_0300">The server: [#!variable!server!#] is verified to be off everywhere.</key>
<key name="job_0301">The DRBD connection from: [#!variable!source_host!#] to: [#!variable!peer_host!#] for the resource/volume: [#!variable!resource!#/#!variable!volume!#] is: [#!variable!replication_state!#]. Will wait for the sync to finish before taking down the resource.</key>
<key name="job_0302">The DRBD resource behind the server is ready to be taken down.</key>
<key name="job_0303">Taking down the DRBD resource: [#!variable!resource!#] on the peer: [#!variable!peer!#] via the IP: [#!variable!ip!#].</key>
<key name="job_0304">The DRBD resource is down.</key>
<!-- Log entries -->
<key name="log_0001">Starting: [#!variable!program!#].</key>
@ -1836,6 +1852,7 @@ Are you sure that you want to delete the server: [#!variable!server_name!#]? [Ty
<key name="message_0231">The 'anvil-safe-start' tool is disabled, exiting. Use '--force' to run anyway.</key>
<key name="message_0232">The 'anvil-safe-start' tool is disabled, but '--force' was used, so proceeding.</key>
<key name="message_0233">It appears that another instance of 'anvil-safe-start' is already runing. Please wait for it to complete (or kill it manually if needed).</key>
<key name="message_0234">Preparing to rename a server.</key>
<!-- Success messages shown to the user -->
<key name="ok_0001">Saved the mail server information successfully!</key>

@ -14,7 +14,7 @@
use strict;
use warnings;
use Anvil::Tools;
require POSIX;
use Data::Dumper;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
@ -30,15 +30,17 @@ my $anvil = Anvil::Tools->new();
# Read switches (target ([user@]host[:port]) and the file with the target's password. If the password is
# passed directly, it will be used. Otherwise, the password will be read from the database.
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'new-name'} = "";
$anvil->data->{switches}{'old-name'} = "";
$anvil->data->{switches}{'job-uuid'} = "";
$anvil->data->{switches}{'new-name'} = "";
$anvil->data->{switches}{'server'} = "";
$anvil->data->{switches}{'server-uuid'} = "";
$anvil->Get->switches;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 2, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::new-name' => $anvil->data->{switches}{'new-name'},
'switches::old-name' => $anvil->data->{switches}{'old-name'},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::job-uuid' => $anvil->data->{switches}{'job-uuid'},
'switches::new-name' => $anvil->data->{switches}{'new-name'},
'switches::server' => $anvil->data->{switches}{'server'},
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'},
}});
$anvil->Database->connect();
@ -70,18 +72,142 @@ if ($anvil->data->{switches}{'job-uuid'})
progress => 1,
job_picked_up_by => $$,
job_picked_up_at => time,
message => "message_0190",
message => "message_0234",
});
# Job data will be in $anvil->data->{jobs}{job_data}
run_jobs($anvil);
# Pull out the job data.
foreach my $line (split/\n/, $anvil->data->{jobs}{job_data})
{
if ($line =~ /server=(.*?)$/)
{
$anvil->data->{switches}{'server'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::server' => $anvil->data->{switches}{'server'},
}});
}
if ($line =~ /server-uuid=(.*?)$/)
{
$anvil->data->{switches}{'server-uuid'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'},
}});
}
if ($line =~ /new-name=(.*?)$/)
{
$anvil->data->{switches}{'new-name'} = $1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::new-name' => $anvil->data->{switches}{'new-name'},
}});
}
}
}
else
# Make sure we're in an Anvil!
$anvil->data->{sys}{anvil_uuid} = $anvil->Cluster->get_anvil_uuid();
if (not $anvil->data->{sys}{anvil_uuid})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0260"});
$anvil->Job->update_progress({progress => 100, message => "error_0260"});
$anvil->nice_exit({exit_code => 1});
}
# Now check that we have a server. If it's a server_uuid, read the server name.
$anvil->Database->get_servers();
if ($anvil->data->{switches}{'server-uuid'})
{
# Interactive!
interactive_question($anvil);
# Convert the server_uuid to a server_name.
my $server_uuid = $anvil->data->{switches}{'server-uuid'};
if (not exists $anvil->data->{servers}{server_uuid}{$server_uuid})
{
# Invalid server UUID.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0269", variables => {
server_uuid => $anvil->data->{switches}{'server-uuid'},
}});
$anvil->Job->update_progress({progress => 100, message => "error_0269,!!server_uuid!".$anvil->data->{switches}{'server-uuid'}."!!"});
$anvil->nice_exit({exit_code => 1});
}
$anvil->data->{switches}{'server'} = $anvil->data->{servers}{server_uuid}{$server_uuid}{server_name};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::server' => $anvil->data->{switches}{'server'},
}});
}
# Do we have a server name?
if (not $anvil->data->{switches}{'server'})
{
# Unable to proceed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0276"});
$anvil->Job->update_progress({progress => 100, message => "error_0276"});
$anvil->nice_exit({exit_code => 1});
}
# Do we have a new server name?
if (not $anvil->data->{switches}{'new-name'})
{
# Unable to proceed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0279"});
$anvil->Job->update_progress({progress => 100, message => "error_0279"});
$anvil->nice_exit({exit_code => 1});
}
# Make sure there are no spaces in the name
$anvil->data->{switches}{'new-name'} =~ s/^\s+//;
$anvil->data->{switches}{'new-name'} =~ s/\s$//;
if ($anvil->data->{switches}{'new-name'} =~ /\s/)
{
# Bad new server name
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0280", variables => {
new_name => $anvil->data->{switches}{'new-name'},
}});
$anvil->Job->update_progress({progress => 100, message => "error_0280,!!new_name!".$anvil->data->{switches}{'new-name'}."!!"});
$anvil->nice_exit({exit_code => 1});
}
# We're going to need a server UUID. If we don't have it, find it from the current name.
if (not $anvil->data->{switches}{'server-uuid'})
{
# Convert the server name to a server_uuid.
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $server_name = $anvil->data->{switches}{'server'};
if (not exists $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name})
{
# Invalid server UUID.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0277", variables => { server => $server_name }});
$anvil->Job->update_progress({progress => 100, message => "error_0277,!!server!".$server_name."!!"});
$anvil->nice_exit({exit_code => 1});
}
$anvil->data->{switches}{'server-uuid'} = $anvil->data->{servers}{anvil_uuid}{$anvil_uuid}{server_name}{$server_name}{server_uuid};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
'switches::server-uuid' => $anvil->data->{switches}{'server-uuid'},
}});
}
# Are we a node?
$anvil->data->{sys}{host_type} = $anvil->Get->host_type();
if ($anvil->data->{sys}{host_type} ne "node")
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0264"});
$anvil->Job->update_progress({progress => 100, message => "error_0264"});
$anvil->nice_exit({exit_code => 1});
}
# This is copied from anvil-boot-server, but it works here as well. We can't use 'pcs' without pacemaker
# being up.
wait_for_pacemaker($anvil);
# Now we're ready.
gather_server_data($anvil);
# Verify that the server is off everywhere.
verify_server_is_off($anvil);
# Now start renaming things.
#rename_server($anvil);
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0281"});
$anvil->Job->update_progress({progress => 100, message => "job_0281"});
$anvil->nice_exit({exit_code => 0});
@ -90,12 +216,408 @@ $anvil->nice_exit({exit_code => 0});
# Functions #
#############################################################################################################
# This actually provisions a VM.
sub run_jobs
# This does the actual rename. It removes the resource from the cluster, makes sure the DRBD resource is down
# on all machines, renames the XML definition file
# Calls virsh locally and on peer(s) to ensure that the server is not running.
sub verify_server_is_off
{
my ($anvil) = @_;
# Is the server running from pacemaker's perspective?
my $waiting = 1;
my $old_server_name = $anvil->data->{switches}{'server'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { old_server_name => $old_server_name }});
while ($waiting)
{
$waiting = 0;
$anvil->Cluster->parse_cib({debug => 2});
if (not exists $anvil->data->{cib}{parsed}{data}{server}{$old_server_name})
{
# Server wasn't found in the cluster config. Wat?!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 0, priority => "err", key => "error_0281"});
$anvil->Job->update_progress({progress => 100, message => "error_0281"});
$anvil->nice_exit({exit_code => 1});
}
my $status = $anvil->data->{cib}{parsed}{data}{server}{$old_server_name}{status};
my $host_name = $anvil->data->{cib}{parsed}{data}{server}{$old_server_name}{host_name};
my $role = $anvil->data->{cib}{parsed}{data}{server}{$old_server_name}{role};
my $active = $anvil->data->{cib}{parsed}{data}{server}{$old_server_name}{active};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:old_server_name' => $old_server_name,
's2:status' => $status,
's2:host_name' => $host_name,
's4:role' => $role,
's5:active' => $active,
}});
if ($status ne "off")
{
$waiting = 1;
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0299", variables => {
server => $old_server_name,
status => $status,
}});
$anvil->Job->update_progress({progress => 22, message => "job_0299,!!server!".$old_server_name."!!,!!status!".$status."!!"});
sleep 10;
}
}
# Now check virsh.
$waiting = 1;
while ($waiting)
{
$waiting = 0;
$anvil->Server->find({refresh => 1});
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{rename_server}{host}})
{
next if $anvil->data->{rename_server}{host}{$host_name}{is_peer};
my $peers_ip = $anvil->data->{rename_server}{host}{$host_name}{use_ip};
my $password = $anvil->data->{rename_server}{host}{$host_name}{password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peers_ip => $peers_ip,
password => $anvil->Log->is_secure($password),
}});
$anvil->Server->find({
refresh => 0,
target => $peers_ip,
password => $password,
});
}
if ((exists $anvil->data->{server}{location}{$old_server_name}) && ($anvil->data->{server}{location}{$old_server_name}{status} ne "shut off"))
{
my $status = $anvil->data->{server}{location}{$old_server_name}{status};
my $host = $anvil->data->{server}{location}{$old_server_name}{host_name};
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0299", variables => {
server => $old_server_name,
status => $status,
host => $host,
}});
$anvil->Job->update_progress({progress => 26, message => "job_0299,!!server!".$old_server_name."!!,!!status!".$status."!!,!!host!".$host."!!"});
sleep 10;
}
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0300", variables => { server => $old_server_name }});
$anvil->Job->update_progress({progress => 28, message => "job_0300,!!server!".$old_server_name."!!"});
# Now make sure the DRBD resource is down on all machines.
my $short_host_name = $anvil->Get->short_host_name();
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { short_host_name => $short_host_name }});
# Wait until the resource is not sync'ing (if it is at all).
$waiting = 1;
while ($waiting)
{
# (Re)fresh my view of the storage.
$waiting = 0;
$anvil->DRBD->get_status({debug => 2});
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{rename_server}{host}})
{
next if not $anvil->data->{rename_server}{host}{$host_name}{is_peer};
my $peers_ip = $anvil->data->{rename_server}{host}{$host_name}{use_ip};
my $password = $anvil->data->{rename_server}{host}{$host_name}{password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peers_ip => $peers_ip,
password => $anvil->Log->is_secure($password),
}});
$anvil->DRBD->get_status({
debug => 2,
target => $peers_ip,
password => $password,
});
}
# Now check to see if anything is sync'ing.
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
's1:old_server_name' => $old_server_name,
's2:host_name' => $host_name,
}});
next if not exists $anvil->data->{drbd}{status}{$host_name}{resource}{$old_server_name};
foreach my $peer_name (sort {$a cmp $b} keys %{$anvil->data->{drbd}{status}{$host_name}{resource}{$old_server_name}{connection}})
{
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peer_name => $peer_name }});
foreach my $volume (sort {$a cmp $b} %{$anvil->data->{drbd}{status}{$host_name}{resource}{$old_server_name}{connection}{$peer_name}{volume}})
{
next if not exists $anvil->data->{drbd}{status}{$host_name}{resource}{$old_server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
my $replication_state = $anvil->data->{drbd}{status}{$host_name}{resource}{$old_server_name}{connection}{$peer_name}{volume}{$volume}{'replication-state'};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
volume => $volume,
replication_state => $replication_state,
}});
if ($replication_state =~ /Sync/i)
{
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0301", variables => {
source_host => $host_name,
peer_host => $peer_name,
resource => $old_server_name,
volume => $volume,
replication_state => $replication_state,
}});
$anvil->Job->update_progress({progress => 30, message => "job_0301,!!source_host!".$host_name."!!,!!peer_host!".$peer_name."!!,!!resource!".$old_server_name."!!,!!volume!".$volume."!!,!!replication_state!".$replication_state."!!"});
}
}
}
}
if ($waiting)
{
sleep 10;
}
}
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0302"});
$anvil->Job->update_progress({progress => 33, message => "job_0302"});
# Shut down the peers first
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{rename_server}{host}})
{
next if not $anvil->data->{rename_server}{host}{$host_name}{is_peer};
my $peers_ip = $anvil->data->{rename_server}{host}{$host_name}{use_ip};
my $password = $anvil->data->{rename_server}{host}{$host_name}{password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
peers_ip => $peers_ip,
password => $anvil->Log->is_secure($password),
}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0303", variables => {
peer => $host_name,
resource => $old_server_name,
ip => $peers_ip,
}});
$anvil->Job->update_progress({progress => 35, message => "job_0303,!!peer!".$host_name."!!,!!resource!".$old_server_name."!!,!!ip!".$peers_ip."!!"});
$anvil->DRBD->manage_resource({
debug => 2,
resource => $old_server_name,
task => "down",
target => $peers_ip,
password => $password,
});
}
$anvil->DRBD->manage_resource({
debug => 2,
resource => $old_server_name,
task => "down",
});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0304"});
$anvil->Job->update_progress({progress => 38, message => "job_0304"});
return(0);
}
# This figures out the names of the definition and DRBD resource files, LV names and other details that will
# be needed to rename the server. This will abort if anything seems wrong.
sub gather_server_data
{
my ($anvil) = @_;
my $old_server_name = $anvil->data->{switches}{'server'};
my $new_server_name = $anvil->data->{switches}{'new-name'};
$anvil->data->{rename_server}{old_definition_file} = $anvil->data->{path}{directories}{shared}{definitions}."/".$old_server_name.".xml";
$anvil->data->{rename_server}{new_definition_file} = $anvil->data->{path}{directories}{shared}{definitions}."/".$new_server_name.".xml";
$anvil->data->{rename_server}{old_drbd_resource_file} = $anvil->data->{path}{directories}{drbd_resources}."/".$old_server_name.".res";
$anvil->data->{rename_server}{new_drbd_resource_file} = $anvil->data->{path}{directories}{drbd_resources}."/".$new_server_name.".res";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"rename_server::old_definition_file" => $anvil->data->{rename_server}{old_definition_file},
"rename_server::new_definition_file" => $anvil->data->{rename_server}{new_definition_file},
"rename_server::old_drbd_resource_file" => $anvil->data->{rename_server}{old_drbd_resource_file},
"rename_server::new_drbd_resource_file" => $anvil->data->{rename_server}{new_drbd_resource_file},
}});
# Parse the DRBD resource file to see if we have a DR target for this server.
$anvil->DRBD->gather_data({debug => 2});
$anvil->Database->get_hosts();
# We'll store our name for finding matches later.
my $local_drbd_node_name = "";
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$old_server_name}{host}})
{
my $host_uuid = $anvil->Get->host_uuid_from_name({host_name => $host_name});
my $host_type = $anvil->data->{hosts}{host_uuid}{$host_uuid}{host_type};
my $peer = $host_uuid eq $anvil->Get->host_uuid ? 0 : 1;
$anvil->data->{rename_server}{host}{$host_name}{host_uuid} = $host_uuid;
$anvil->data->{rename_server}{host}{$host_name}{host_type} = $host_type;
$anvil->data->{rename_server}{host}{$host_name}{is_peer} = $peer;
$anvil->data->{rename_server}{host}{$host_name}{use_ip} = "";
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"rename_server::host::${host_name}::host_uuid" => $anvil->data->{rename_server}{host}{$host_name}{host_uuid},
"rename_server::host::${host_name}::host_type" => $anvil->data->{rename_server}{host}{$host_name}{host_type},
"rename_server::host::${host_name}::is_peer" => $anvil->data->{rename_server}{host}{$host_name}{is_peer},
}});
if (not $peer)
{
$local_drbd_node_name = $host_name;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { local_drbd_node_name => $local_drbd_node_name }});
$anvil->Network->load_ips({
host => $local_drbd_node_name,
host_uuid => $host_uuid,
});
}
foreach my $volume (sort {$a cmp $b} keys %{$anvil->data->{new}{resource}{$old_server_name}{host}{$host_name}{volume}})
{
my $old_device_path = $anvil->data->{new}{resource}{$old_server_name}{host}{$host_name}{volume}{$volume}{device_path};
my $new_device_path = $old_device_path;
$new_device_path =~ s/$old_server_name/$new_server_name/g;
my $old_backing_disk = $anvil->data->{new}{resource}{$old_server_name}{host}{$host_name}{volume}{$volume}{backing_disk};
my $new_backing_disk = $old_backing_disk;
$new_backing_disk =~ s/$old_server_name/$new_server_name/g;
$anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{old_device_path} = $old_device_path;
$anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{new_device_path} = $new_device_path;
$anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{old_backing_disk} = $old_backing_disk;
$anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{new_backing_disk} = $new_backing_disk;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"rename_server::host::${host_name}::volume::${volume}::old_device_path" => $anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{old_device_path},
"rename_server::host::${host_name}::volume::${volume}::new_device_path" => $anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{new_device_path},
"rename_server::host::${host_name}::volume::${volume}::old_backing_disk" => $anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{old_backing_disk},
"rename_server::host::${host_name}::volume::${volume}::new_backing_disk" => $anvil->data->{rename_server}{host}{$host_name}{volume}{$volume}{new_backing_disk},
}});
}
}
# Make sure we can talk to peers.
my $waiting = 1;
while($waiting)
{
$waiting = 0;
foreach my $host_name (sort {$a cmp $b} keys %{$anvil->data->{rename_server}{host}})
{
next if not $anvil->data->{rename_server}{host}{$host_name}{is_peer};
my $host_uuid = $anvil->data->{rename_server}{host}{$host_name}{host_uuid};
my $anvil_uuid = $anvil->data->{sys}{anvil_uuid};
my $password = $anvil->data->{anvils}{anvil_uuid}{$anvil_uuid}{anvil_password};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
host_name => $host_name,
host_uuid => $host_uuid,
anvil_uuid => $anvil_uuid,
password => $anvil->Log->is_secure($password),
}});
$anvil->Network->load_ips({
host => $host_name,
host_uuid => $host_uuid,
});
my $peers_ip = "";
my ($match) = $anvil->Network->find_matches({
debug => 2,
first => $local_drbd_node_name,
second => $host_name,
});
my $access = 0;
if ($match)
{
# Yup!
foreach my $interface (sort {$a cmp $b} keys %{$match->{$host_name}})
{
my $peers_ip = $match->{$host_name}{$interface}{ip};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { peers_ip => $peers_ip }});
$access = $anvil->Remote->test_access({
target => $peers_ip,
password => $password,
});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { access => $access }});
if ($access)
{
$anvil->data->{rename_server}{host}{$host_name}{use_ip} = $peers_ip;
$anvil->data->{rename_server}{host}{$host_name}{password} = $password;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
"rename_server::host::${host_name}::use_ip" => $anvil->data->{rename_server}{host}{$host_name}{use_ip},
}});
last;
}
}
}
if (not $access)
{
# Unable to reach this peer, so we need to keep waiting.
$waiting = 1;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0297", variables => { host_name => $host_name }});
$anvil->Job->update_progress({progress => 18, message => "job_0297,!!host_name!".$host_name."!!"});
}
}
if ($waiting)
{
sleep 10;
}
}
# All peer(s) are ready!
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0298"});
$anvil->Job->update_progress({progress => 20, message => "job_0298"});
return(0);
}
sub wait_for_pacemaker
{
my ($anvil) = @_;
# We need to rename the server in the cluster, and we need both nodes up to do it.
my $waiting = 1;
while($waiting)
{
my $problem = $anvil->Cluster->parse_cib({debug => 2});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { problem => $problem }});
if (not $problem)
{
my $local_name = $anvil->data->{cib}{parsed}{'local'}{name};
my $peer_name = $anvil->data->{cib}{parsed}{peer}{name};
my $local_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
my $peer_ready = $anvil->data->{cib}{parsed}{data}{node}{$local_name}{node_state}{ready};
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
if (($local_ready) && ($peer_ready))
{
# We're good.
$waiting = 0;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 2, list => { waiting => $waiting }});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0295"});
$anvil->Job->update_progress({progress => 15, message => "job_0295"});
}
else
{
# One or both nods are not online yet.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0296", variables => {
local_name => $local_name,
peer_name => $peer_name,
local_ready => $local_ready,
peer_ready => $peer_ready,
}});
$anvil->Job->update_progress({progress => 10, message => "job_0296,!!local_name!".$local_name."!!,!!peer_name!".$peer_name."!!,!!local_ready!".$local_ready."!!,!!peer_ready!".$peer_ready."!!"});
}
}
else
{
# Cluster hasn't started.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => 1, level => 1, key => "job_0277"});
$anvil->Job->update_progress({progress => 5, message => "job_0277"});
}
if ($waiting)
{
sleep 10;
}
}
return(0);
}

Loading…
Cancel
Save