anvil/notes

BEGIN TRANSACTION;
ALTER TABLE history.recipients RENAME recipient_new_level TO recipient_level;
ALTER TABLE history.recipients DROP COLUMN recipient_units;
ALTER TABLE recipients RENAME recipient_new_level TO recipient_level;
ALTER TABLE recipients DROP COLUMN recipient_units;
ALTER TABLE alert_sent DROP COLUMN alert_name;

DROP FUNCTION history_recipients() CASCADE;
CREATE FUNCTION history_recipients() RETURNS trigger
AS $$
DECLARE
    history_recipients RECORD;
BEGIN
    SELECT INTO history_recipients * FROM recipients WHERE recipient_uuid = new.recipient_uuid;
    INSERT INTO history.recipients
        (recipient_uuid,
         recipient_name,
         recipient_email,
         recipient_language,
         recipient_level,
         modified_date)
    VALUES
        (history_recipients.recipient_uuid,
         history_recipients.recipient_name,
         history_recipients.recipient_email,
         history_recipients.recipient_language,
         history_recipients.recipient_level,
         history_recipients.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_recipients() OWNER TO admin;

CREATE TRIGGER trigger_recipients
    AFTER INSERT OR UPDATE ON recipients
    FOR EACH ROW EXECUTE PROCEDURE history_recipients();

ALTER TABLE alerts ADD COLUMN alert_processed integer not null default 0;
ALTER TABLE history.alerts ADD COLUMN alert_processed integer;
DROP FUNCTION history_alerts() CASCADE;
CREATE FUNCTION history_alerts() RETURNS trigger
AS $$
DECLARE
    history_alerts RECORD;
BEGIN
    SELECT INTO history_alerts * FROM alerts WHERE alert_uuid = new.alert_uuid;
    INSERT INTO history.alerts
        (alert_uuid,
         alert_host_uuid,
         alert_set_by,
         alert_level,
         alert_title,
         alert_message,
         alert_sort_position,
         alert_show_header,
         alert_processed,
         modified_date)
    VALUES
        (history_alerts.alert_uuid,
         history_alerts.alert_host_uuid,
         history_alerts.alert_set_by,
         history_alerts.alert_level,
         history_alerts.alert_title,
         history_alerts.alert_message,
         history_alerts.alert_sort_position,
         history_alerts.alert_show_header,
         history_alerts.alert_processed,
         history_alerts.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_alerts() OWNER TO admin;

CREATE TRIGGER trigger_alerts
    AFTER INSERT OR UPDATE ON alerts
    FOR EACH ROW EXECUTE PROCEDURE history_alerts();

    -- This stores weighted health of nodes. Agents can set one or more health values. After a scan sweep
-- completes, ScanCore will sum these weights and the node with the *highest* value is considered the
-- *least* healthy and any servers on it will be migrated to the peer.
CREATE TABLE health (
    health_uuid             uuid                        primary key,
    health_host_uuid        uuid                        not null,       -- The name of the node or dashboard that this health came from.
    health_agent_name       text                        not null,       -- This is the scan agent (or program name) setting this score.
    health_source_name      text                        not null,       -- This is the name of the problem, as set by the agent.
    health_source_weight    numeric                     not null,       -- This is the numerical weight of this alert. The higher this value, the more severe the health issue is
    modified_date           timestamp with time zone    not null,

    FOREIGN KEY(health_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE health OWNER TO admin;

CREATE TABLE history.health (
    history_id              bigserial,
    health_uuid             uuid                        not null,
    health_host_uuid        uuid                        not null,
    health_agent_name       text                        not null,
    health_source_name      text                        not null,
    health_source_weight    numeric                     not null,
    modified_date           timestamp with time zone    not null
);
ALTER TABLE history.health OWNER TO admin;

CREATE FUNCTION history_health() RETURNS trigger
AS $$
DECLARE
    history_health RECORD;
BEGIN
    SELECT INTO history_health * FROM health WHERE health_uuid = new.health_uuid;
    INSERT INTO history.health
        (health_uuid,
         health_host_uuid,
         health_agent_name,
         health_source_name,
         health_source_weight,
         modified_date)
    VALUES
        (history_health.health_uuid,
         history_health.health_host_uuid,
         history_health.health_agent_name,
         history_health.health_source_name,
         history_health.health_source_weight,
         history_health.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_health() OWNER TO admin;

CREATE TRIGGER trigger_health
    AFTER INSERT OR UPDATE ON health
    FOR EACH ROW EXECUTE PROCEDURE history_health();

CREATE TABLE power (
    power_uuid                 uuid                       primary key,
    power_host_uuid            uuid                        not null,      -- The name of the node or dashboard that this power came from.
    power_ups_uuid             uuid                        not null,      -- This is the 'upses' -> 'ups_uuid' of the UPS. This is used to map what UPSes are powering a given node.
    power_agent_name           text                        not null,      -- This is the name of the scan agent that wrote a given entry
    power_on_battery           boolean                     not null,      -- TRUE == use "time_remaining" to determine if graceful power off is needed. FALSE == power loss NOT imminent, do not power off node.
    power_seconds_left         numeric,                                   -- Should always be set, but not required *EXCEPT* when 'power_on_battery' is TRUE.
    power_charge_percentage    numeric,                                   -- Percentage charge in the UPS. Used to determine when the dashboard should boot the node after AC restore
    modified_date              timestamp with time zone    not null,

    FOREIGN KEY(power_host_uuid) REFERENCES hosts(host_uuid),
    FOREIGN KEY(power_ups_uuid) REFERENCES upses(ups_uuid)
);
ALTER TABLE power OWNER TO admin;

CREATE TABLE history.power (
    history_id                 bigserial,
    power_uuid                 uuid,
    power_host_uuid            uuid,
    power_ups_uuid             uuid,
    power_agent_name           text,
    power_on_battery           boolean,
    power_seconds_left         numeric,
    power_charge_percentage    numeric,
    modified_date              timestamp with time zone    not null
);
ALTER TABLE history.power OWNER TO admin;

CREATE FUNCTION history_power() RETURNS trigger
AS $$
DECLARE
    history_power RECORD;
BEGIN
    SELECT INTO history_power * FROM power WHERE power_uuid = new.power_uuid;
    INSERT INTO history.power
        (power_uuid,
         power_host_uuid,
         power_ups_uuid,
         power_agent_name,
         power_on_battery,
         power_seconds_left,
         power_charge_percentage,
         modified_date)
    VALUES
        (history_power.power_uuid,
         history_power.power_host_uuid,
         history_power.power_ups_uuid,
         history_power.power_agent_name,
         history_power.power_on_battery,
         history_power.power_seconds_left,
         history_power.power_charge_percentage,
         history_power.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_power() OWNER TO admin;

CREATE TRIGGER trigger_power
    AFTER INSERT OR UPDATE ON power
    FOR EACH ROW EXECUTE PROCEDURE history_power();

-- This stores temperature information for a given host. ScanCore checks this data to decice if action needs
-- to be taken during a thermal event. On nodes, this is used to decide if a node should be shed or if an
-- Anvil! needs to be stopped entirely. On dashboards, this is used to check if/when it is safe to restart a
-- node that shut down because of a thermal event.
CREATE TABLE temperature (
    temperature_uuid           uuid                        primary key,
    temperature_host_uuid      uuid                        not null,       -- The name of the node or dashboard that this temperature came from.
    temperature_agent_name     text                        not null,       -- This is the name of the agent that set the alert
    temperature_sensor_host    text                        not null,       -- This is the host (uuid) that the sensor was read from. This is important as ScanCore on a striker will read available thermal data from a node using it's IPMI data.
    temperature_sensor_name    text                        not null,       -- This is the name of the sensor reporting the temperature
    temperature_celsius        numeric                     not null,       -- This is the actual temperature, in celcius of course.
    temperature_state          text                        not null,       -- This is a string represnting the state of the sensor. Valid values are 'ok', 'warning', and 'critical'
    temperature_is             text                        not null,       -- This indicate if the temperature 'nominal', 'high' or 'low'.
    modified_date              timestamp with time zone    not null,

    FOREIGN KEY(temperature_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE temperature OWNER TO admin;

CREATE TABLE history.temperature (
    history_id                 bigserial,
    temperature_uuid           uuid                        not null,
    temperature_host_uuid      uuid                        not null,
    temperature_agent_name     text                        not null,
    temperature_sensor_host    text                        not null,
    temperature_sensor_name    text                        not null,
    temperature_celsius        numeric                     not null,
    temperature_state          text                        not null,
    temperature_is             text                        not null,
    modified_date              timestamp with time zone    not null
);
ALTER TABLE history.temperature OWNER TO admin;

CREATE FUNCTION history_temperature() RETURNS trigger
AS $$
DECLARE
    history_temperature RECORD;
BEGIN
    SELECT INTO history_temperature * FROM temperature WHERE temperature_uuid = new.temperature_uuid;
    INSERT INTO history.temperature
        (temperature_uuid,
         temperature_host_uuid,
         temperature_agent_name,
         temperature_sensor_host,
         temperature_sensor_name,
         temperature_celsius,
         temperature_state,
         temperature_is,
         modified_date)
    VALUES
        (history_temperature.temperature_uuid,
         history_temperature.temperature_host_uuid,
         history_temperature.temperature_agent_name,
         history_temperature.temperature_sensor_host,
         history_temperature.temperature_sensor_name,
         history_temperature.temperature_celsius,
         history_temperature.temperature_state,
         history_temperature.temperature_is,
         history_temperature.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_temperature() OWNER TO admin;

CREATE TRIGGER trigger_temperature
    AFTER INSERT OR UPDATE ON temperature
    FOR EACH ROW EXECUTE PROCEDURE history_temperature();

DROP FUNCTION history_definitions() CASCADE;
DROP TABLE history.definitions;
DROP TABLE definitions;
DROP FUNCTION history_servers() CASCADE;
DROP TABLE history.servers;
DROP TABLE servers;

CREATE TABLE servers (
    server_uuid                        uuid                        not null    primary key,
    server_name                        text                        not null,                     -- This is the server's name. It can change without re-uploading the server.
    server_anvil_uuid                  uuid                        not null,                     -- This is the Anvil! system that the server lives on. It can move to another Anvil!, so this can change.
    server_user_stop                   boolean                     not null    default FALSE,    -- When set, the server was stopped by a user. The Anvil! will not start a server that has been cleanly stopped.
    server_start_after_server_uuid     uuid,                                                     -- This can be the server_uuid of another server. If set, this server will boot 'server_start_delay' seconds after the referenced server boots. A value of '00000000-0000-0000-0000-000000000000' will tell 'anvil-safe-start' to not boot the server at all. If a server is set not to start, any dependent servers will also stay off.
    server_start_delay                 integer                     not null    default 0,        -- See above.
    server_host_uuid                   uuid                        not null,                     -- This is the current hosts -> host_uuid for this server. If the server is off, this will be blank.
    server_state                       text                        not null,                     -- This is the current state of this server, as reported by 'virsh list --all' (see: man virsh -> GENERIC COMMANDS -> --list)
    server_live_migration              boolean                     not null    default TRUE,     -- When false, servers will be frozen for a migration, instead of being migrated while the server is migrating. During a cold migration, the server will be unresponsive, so connections to it could time out. However, by being frozen the migration will complete faster.
    server_pre_migration_file_uuid     uuid,                                                     -- This is set to the files -> file_uuid of a script to run BEFORE migrating a server. If the file isn't found or can't run, the script is ignored.
    server_pre_migration_arguments     text                        not null,                     -- These are arguments to pass to the pre-migration script
    server_post_migration_file_uuid    uuid,                                                     -- This is set to the files -> file_uuid of a script to run AFTER migrating a server. If the file isn't found or can't run, the script is ignored.
    server_post_migration_arguments    text                        not null,                     -- These are arguments to pass to the post-migration script
    server_ram_in_use                  numeric                     not null,                     -- This is the amount of RAM currently used by the server. If the server is off, then this is the amount of RAM last used when the server was running.
    server_configured_ram              numeric                     not null,                     -- This is the amount of RAM allocated to the server in the on-disk definition file. This should always match the table above, but allows us to track when a user manually updated the allocated RAM in the on-disk definition, but that hasn't yet been picked up by the server
    server_updated_by_user             numeric                     not null,                     -- This is set to a unix timestamp when the user last updated the definition (via striker). When set, scan-server will check this value against the age of the definition file on disk. If this is newer, the on-disk defition will be updated. On the host with the server (if any), the new definition will be loaded into virsh as well.
    server_boot_time                   numeric                     not null,                     -- This is the unix time (since epoch) when the server booted. It is calculated by checking the 'ps -p <pid> -o etimes=' when a server is seen to be running when it had be last seen as off. If a server that had been running is seen to be off, this is set back to 0.
    modified_date                      timestamp with time zone    not null,

    FOREIGN KEY(server_anvil_uuid)               REFERENCES anvils(anvil_uuid),
    FOREIGN KEY(server_start_after_server_uuid)  REFERENCES servers(server_uuid),
    FOREIGN KEY(server_host_uuid)                REFERENCES hosts(host_uuid),
    FOREIGN KEY(server_pre_migration_file_uuid)  REFERENCES files(file_uuid),
    FOREIGN KEY(server_post_migration_file_uuid) REFERENCES files(file_uuid)
);
ALTER TABLE servers OWNER TO admin;

CREATE TABLE history.servers (
    history_id                         bigserial,
    server_uuid                        uuid,
    server_name                        text,
    server_anvil_uuid                  uuid,
    server_user_stop                   boolean,
    server_start_after_server_uuid     uuid,
    server_start_delay                 integer,
    server_host_uuid                   uuid,
    server_state                       text,
    server_live_migration              boolean,
    server_pre_migration_file_uuid     uuid,
    server_pre_migration_arguments     text,
    server_post_migration_file_uuid    uuid,
    server_post_migration_arguments    text,
    server_ram_in_use                  numeric,
    server_configured_ram              numeric,
    server_updated_by_user             numeric,
    server_boot_time                   numeric,
    modified_date                      timestamp with time zone    not null
);
ALTER TABLE history.servers OWNER TO admin;

CREATE FUNCTION history_servers() RETURNS trigger
AS $$
DECLARE
    history_servers RECORD;
BEGIN
    SELECT INTO history_servers * FROM servers WHERE server_uuid = new.server_uuid;
    INSERT INTO history.servers
        (server_uuid,
         server_name,
         server_anvil_uuid,
         server_user_stop,
         server_start_after_server_uuid,
         server_start_delay,
         server_host_uuid,
         server_state,
         server_live_migration,
         server_pre_migration_file_uuid,
         server_pre_migration_arguments,
         server_post_migration_file_uuid,
         server_post_migration_arguments,
         server_ram_in_use,
         server_configured_ram,
         server_updated_by_user,
         server_boot_time,
         modified_date)
    VALUES
        (history_servers.server_uuid,
         history_servers.server_name,
         history_servers.server_anvil_uuid,
         history_servers.server_user_stop,
         history_servers.server_start_after_server_uuid,
         history_servers.server_start_delay,
         history_servers.server_host_uuid,
         history_servers.server_state,
         history_servers.server_live_migration,
         history_servers.server_pre_migration_file_uuid,
         history_servers.server_pre_migration_arguments,
         history_servers.server_post_migration_file_uuid,
         history_servers.server_post_migration_arguments,
         history_servers.server_ram_in_use,
         history_servers.server_configured_ram,
         history_servers.server_updated_by_user,
         history_servers.server_boot_time,
         history_servers.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_servers() OWNER TO admin;

CREATE TRIGGER trigger_servers
    AFTER INSERT OR UPDATE ON servers
    FOR EACH ROW EXECUTE PROCEDURE history_servers();


-- This stores the XML definition for a server. Whenever a server_definition is found missing on a node or DR host,
-- it will be rewritten from here. If this copy changes, it will be updated on the hosts.
CREATE TABLE server_definitions (
    server_definition_uuid           uuid                        not null    primary key,
    server_definition_server_uuid    uuid                        not null,                   -- This is the servers -> server_uuid of the server
    server_definition_xml            text                        not null,                   -- This is the XML body.
    modified_date                    timestamp with time zone    not null,

    FOREIGN KEY(server_definition_server_uuid) REFERENCES servers(server_uuid)
);
ALTER TABLE server_definitions OWNER TO admin;

CREATE TABLE history.server_definitions (
    history_id                       bigserial,
    server_definition_uuid           uuid,
    server_definition_server_uuid    uuid,
    server_definition_xml            text,
    modified_date                    timestamp with time zone    not null
);
ALTER TABLE history.server_definitions OWNER TO admin;

CREATE FUNCTION history_server_definitions() RETURNS trigger
AS $$
DECLARE
    history_server_definitions RECORD;
BEGIN
    SELECT INTO history_server_definitions * FROM server_definitions WHERE server_definition_uuid = new.server_definition_uuid;
    INSERT INTO history.server_definitions
        (server_definition_uuid,
         server_definition_server_uuid,
         server_definition_xml,
         modified_date)
    VALUES
        (history_server_definitions.server_definition_uuid,
         history_server_definitions.server_definition_server_uuid,
         history_server_definitions.server_definition_xml,
         history_server_definitions.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_server_definitions() OWNER TO admin;

CREATE TRIGGER trigger_server_definitions
    AFTER INSERT OR UPDATE ON server_definitions
    FOR EACH ROW EXECUTE PROCEDURE history_server_definitions();


COMMIT;

============
From:     test-alert@alert.alteeve.com
To:       Madison Kelly <debug@alteeve.com>
Subject:  [ ScanCore ] - Test email
Reply-To:

This is a test alert.

============

DISABLE KSM!
- https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/virtualization_tuning_and_optimization_guide/chap-ksm

NEXT; -

RHEL 8 package changes:
chrony   replaces ntp
cockpit  replaces virt-manager (deprecated)
e1000e   replaces e1000 driver
tmux     replaces screen
iproute2 replaces bridge-utils (See "ip link" and man bridge.)


==] UEFI Setup [======================================================================
ignoredisk --only-use=vdb,vda
clearpart --none --initlabel
part raid.312 --fstype="mdmember" --ondisk=vda --size=19966
part raid.293 --fstype="mdmember" --ondisk=vdb --size=512
part raid.319 --fstype="mdmember" --ondisk=vdb --size=19966
part raid.286 --fstype="mdmember" --ondisk=vda --size=512
raid pv.326 --device=pv1 --fstype="lvmpv" --level=RAID1 raid.312 raid.319
raid /boot/efi --device=efi --fstype="efi" --level=RAID1 --fsoptions="umask=0077,shortname=winnt" --label=efi raid.286 raid.293
volgroup striker_vg0 --pesize=4096 pv.326
logvol swap --fstype="swap" --size=4096 --name=lv_swap --vgname=striker_vg0
logvol / --fstype="xfs" --size=15852 --label="lv_root" --name=lv_root --vgname=striker_vg0
======================================================================================

DOCS; -
- Explanation of 'comps.xml' (package grouping) - https://pagure.io/fedora-comps
- Firewalld
  - https://www.digitalocean.com/community/tutorials/how-to-set-up-a-firewall-using-firewalld-on-centos-7
- PXE;
  - https://docs.fedoraproject.org/en-US/fedora/f28/install-guide/advanced/Network_based_Installations/
  - https://docs.fedoraproject.org/en-US/Fedora/26/html/Installation_Guide/chap-pxe-server-setup.html
  - UEFI PXE notes - https://www.syslinux.org/wiki/index.php?title=PXELINUX#UEFI
- How to write a NetworkManager dispatcher script to apply ethtool commands? - https://access.redhat.com/solutions/2841131
- Setup nodes to log to striker? - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-configuring_netconsole
- Pacemaker can be monitored via SNMP - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/high_availability_add-on_reference/s1-snmpandpacemaker-HAAR
- corosync.conf - https://access.redhat.com/articles/3185291

====

Network planning;

 10.x.y.z / 255.255.0.0
 10.x.y.z / 255.255.0.0

x = Network;
    - BCN = 200 + network
            ie: BCN1 = 10.201.y.z
                BCN2 = 10.202.y.z
    -  SN = 100 + network
            ie: SN1 = 10.101.y.z
                SN2 = 10.102.y.z

y = Device Type.
    Foudation Pack;
    1. Switches
    2. PDUs
    3. UPSes
    4. Strikers
    5. Striker IPMI (BCN only)

    Anvil! systems;
    1st - 10 = Node IP
          11 = Node IPMI
    2nd - 12 = Node IP
          13 = Node IPMI
    3rd - 14 = Node IP
          15 = Node IPMI
    N...

z = Device Sequence
    - Foundation pack devices are simple sequence
    - Anvils; .1 = node 1, .2 = node 2, .3 = dr

====

RHEL 8 Firewall
- https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8-beta/html/configuring_and_managing_networking/assembly_using-firewalls_configuring-networking-with-gnome-gui

=============================

Ports we care about

Porto	Number		Used by		Nets		Description
TCP	2224		pcsd 		bcn		It is crucial to open port 2224 in such a way that pcs from any node can talk to all nodes in the cluster, including itself.
UDP	5404		corosync	bcn		Required on corosync nodes if corosync is configured for multicast UDP
UDP	5405		corosync	bcn		Required on all corosync nodes (needed by corosync)
TCP	7788+		drbd		sn		1 port per resource
TCP	49152-49215	virsh		bcn		live migration - migration_port_min and migration_port_max attributes in the /etc/libvirt/qemu.conf

NOTE: DHCP listens to raw sockets and ignores firewalld rules. We need to stop dhcpd directly - https://kb.isc.org/docs/aa-00378

* After all changes;
firewall-cmd --zone=public --add-port=49152-49215/tcp --permanent
firewall-cmd --reload

- Paths
If we want to create services or helpers later, look under - /usr/lib/firewalld/
Core firewalld configs, including defaults zones, etc      - /etc/firewalld/

* Zones are meant to deal with dynamic environments and aren't that useful in mostly static server environments
* Use 'firewall-cmd' WITHOUT '--permanent' for things like enabling the VNC port for a server. Use '--permanent' for everything else.
====

Striker as PXE server
====


# Bootloader for BIOS
OS="fedora28"
mkdir /var/lib/tftpboot/
cp $(anvil source)/pxe/tftpboot/bios/* /var/lib/tftpboot/
chmod 755 /var/lib/tftpboot/*

# Bootloader for UEFI
cp $(anvil source)/pxe/tftpboot/uefi/* /var/lib/tftpboot/uefi/
chmod 755 /var/lib/tftpboot/uefi/*

# Copy kernel images for tftpboot downloads
mkdir -p /var/lib/tftpboot/${OS}
cp /lib/modules/$(uname -r)/vmlinuz /var/lib/tftpboot/${OS}/
mkinitrd /var/lib/tftpboot/${OS}/initrd.img $(uname -r)

# Configs from anvil source
rsync -av pxe/tftpboot/pxelinux.cfg/default root@f28-striker01:/var/lib/tftpboot/pxelinux.cfg/
rsync -av pxe/tftpboot/pxelinux/uefi        root@f28-striker01:/var/lib/tftpboot/pxelinux/

==== UEFI boot crash
>>Start PXE over IPv4.
  Station IP address is 10.1.14.186

  Server IP address is 10.1.4.1
  NBP filename is uefi/shim.efi
  NBP filesize is 1210776 Bytes
 Downloading NBP file...

  NBP file downloaded successfully.
Fetching Netboot Image
!!!! X64 Exception Type - 0D(#GP - General Protection)  CPU Apic ID - 00000000 !!!!
ExceptionData - 0000000000000000
RIP  - 000000007FF8E976, CS  - 0000000000000038, RFLAGS - 0000000000210202
RAX  - 0000000000000001, RCX - 0000000000000010, RDX - AFAFAFAFAFAFAFA7
RBX  - 0000000000000020, RSP - 000000007FF7E6B0, RBP - 000000007ED11F18
RSI  - AFAFAFAFAFAFAFAF, RDI - 000000007FFA1720
R8   - 0000000000000000, R9  - 0000000000000028, R10 - 0000000000000020
R11  - 0000000000000002, R12 - 000000007ECE3798, R13 - 000000007ECE3C18
R14  - 000000007FF7E788, R15 - 000000007ECE37D8
DS   - 0000000000000030, ES  - 0000000000000030, FS  - 0000000000000030
GS   - 0000000000000030, SS  - 0000000000000030
CR0  - 0000000080010033, CR2 - 0000000000000000, CR3 - 000000007FC01000
CR4  - 0000000000000668, CR8 - 0000000000000000
DR0  - 0000000000000000, DR1 - 0000000000000000, DR2 - 0000000000000000
DR3  - 0000000000000000, DR6 - 00000000FFFF0FF0, DR7 - 0000000000000400
GDTR - 000000007FBEE698 0000000000000047, LDTR - 0000000000000000
IDTR - 000000007F5B5018 0000000000000FFF,   TR - 0000000000000000
FXSAVE_STATE - 000000007FF7E310
!!!! Find image based on IP(0x7FF8E976) /builddir/build/BUILD/tianocore-edk2-cb5f4f45ce/Build/OvmfX64/DEBUG_GCC5/X64/MdeModulePkg/Core/Dxe/DxeMain/DEBUG/DxeCore.dll (ImageBase=000000007FF80000, EntryPoint=000000007FF98DB1) !!!!

====

DB stuff;

Dump;
su - postgres -c "pg_dump anvil" > /anvil.out

Drop;
su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil"

Reload the DB;
su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /anvil.out" && su - postgres -c "psql anvil"


Changes made using tools such as nmcli do not require a reload but do require the associated interface to be put down and then up again. That can be done by using commands in the following format:
* nmcli dev disconnect interface-name
Followed by:
* nmcli con up interface-name

NOTE: RHEL doesn't support direct-cabled bonds - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/ch-configure_network_bonding

ifcfg-X config Notes - /usr/share/doc/initscripts-*/sysconfig.txt (Look for the sections describing files /etc/sysconfig/network and /etc/sysconfig/network-scripts/ifcfg-<interface-name>);
                     - man 5 nm-settings-ifcfg-rh
                     - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-Using_Channel_Bonding#s3-modules-bonding-directives
                     - /usr/share/doc/kernel-doc-*/Documentation/networking/bonding.txt
iface
* PREFIXx overrules NETMASKx. Use PREFIXx, not NETMASKx.
* The 'x' suffice for PREFIX, NETMASK, etc start at 0 and must count up by 1 at a time.
* ZONE will be useful for the firewall stuff later.
* ETHTOOL_OPTS is deprecated, replaced by using udev rules
* initscripts interpret PEERDNS=no to mean "never touch resolv.conf". NetworkManager interprets it to say "never add automatic (DHCP, PPP, VPN, etc.) nameservers to resolv.conf".
Bond
* resend_igmp & num_unsol_na={1~255} may help if a switch is slow to notice traffic has moved to the new interface. default is 1. Each update is send 200ms apart.
* Bridged interfaces should use BRIDGE_UUID="", _not_ BRIDGE="". The former causes the later to be ignored and the later is only used for possible compatibility reasons.
Bridge
* STP=no is default, we'll test 'yes'.
* DOMAIN="<client_domain>"

=======
virt-manager stores information in dconf-editor -> /org/virt-manager/virt-manager/connections ($HOME/.config/dconf/user)

==== dconf read /org/virt-manager/virt-manager/connections/uris
['qemu+ssh://root@localhost/system', 'qemu+ssh://root@wp-a01n02.remote/system', 'qemu+ssh://root@an-nas02.kw01.alteeve.ca/system', 'qemu+ssh://root@hb-a01n01.remote/system', 'qemu+ssh://root@hb-a01n02.remote/system', 'qemu:///system']
==== dconf read /org/virt-manager/virt-manager/connections/autoconnect
['qemu+ssh://root@localhost/system']
====

# Web - TODO: Setup to auto-use "Let's Encrypt", but make sure we have an offline fall-back

# SELinux
restorecon -rv /var/www

=============================================================

* Network;
** {bc,if,s}nX_{link,bond,bridge}Y naming
** firewall; - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/high_availability_add-on_reference/s1-firewalls-haar
firewall-cmd --permanent --add-service=high-availability
firewall-cmd --add-service=high-availability
firewall-cmd --reload


* Cluster Config;
==== Both nodes
echo Initial1 | passwd hacluster --stdin
systemctl start pcsd.service
systemctl enable pcsd.service
systemctl disable libvirtd.service
systemctl stop libvirtd.service
==== One node
pcs host auth el8-a01n01 el8-a01n02 -u hacluster -p "secret"

### VMs
pcs cluster setup m3-anvil-01 el8-a01n01 el8-a01n02
pcs cluster start --all
pcs stonith create virsh_node1 fence_virsh pcmk_host_list="el8-a01n01" ipaddr="192.168.122.1" passwd="secret" login="root" delay="15" port="el8-a01n01" op monitor interval="60"
pcs stonith create virsh_node2 fence_virsh pcmk_host_list="el8-a01n02" ipaddr="192.168.122.1" passwd="secret" login="root" port="el8-a01n02" op monitor interval="60"

### Real iron.
pcs stonith create ipmilan_node1 fence_ipmilan pcmk_host_list="mk-a02n01" ipaddr="10.201.13.1" password="another secret p" username="admin" delay="15"  op monitor interval="60"
pcs stonith level add 1 mk-a02n01 ipmilan_node1

pcs stonith create ipmilan_node2 fence_ipmilan pcmk_host_list="mk-a02n02" ipaddr="10.201.13.2" password="another secret p" username="admin" op monitor interval="60"
pcs stonith level add 1 mk-a02n02 ipmilan_node2

pcs stonith create apc_snmp_node1_psu1 fence_apc_snmp pcmk_host_list="mk-a02n01" pcmk_off_action="reboot" ip="10.201.2.3" port="3" power_wait="5" op monitor interval="60"
pcs stonith create apc_snmp_node1_psu2 fence_apc_snmp pcmk_host_list="mk-a02n01" pcmk_off_action="reboot" ip="10.201.2.4" port="3" power_wait="5" op monitor interval="60"
pcs stonith level add 2 mk-a02n01 apc_snmp_node1_psu1,apc_snmp_node1_psu2

pcs stonith create apc_snmp_node2_psu1 fence_apc_snmp pcmk_host_list="mk-a02n02" pcmk_off_action="reboot" ip="10.201.2.3" port="4" power_wait="5" op monitor interval="60"
pcs stonith create apc_snmp_node2_psu2 fence_apc_snmp pcmk_host_list="mk-a02n02" pcmk_off_action="reboot" ip="10.201.2.4" port="4" power_wait="5" op monitor interval="60"
pcs stonith level add 2 mk-a02n02 apc_snmp_node2_psu1,apc_snmp_node2_psu2

pcs stonith create delay_node1 fence_delay pcmk_host_list="mk-a02n01" wait="60" op monitor interval="60"
pcs stonith level add 3 mk-a02n01 delay_node1

pcs stonith create delay_node2 fence_delay pcmk_host_list="mk-a02n02" wait="60" op monitor interval="60"
pcs stonith level add 3 mk-a02n02 delay_node2

# Either case
pcs property set stonith-max-attempts=INFINITY
pcs property set stonith-enabled=true

### TODO: Look into 'priority-fencing-delay'


# Create a new server resource, stopped, create the location constraint (higher == preferred), then start.
pcs resource create srv07-el6 ocf:alteeve:server name="srv07-el6" meta allow-migrate="true" target-role="stopped" op monitor interval="60" on-fail="block" stop timeout="INFINITY" migrate_to timeout="INFINITY"
pcs constraint location srv07-el6 prefers mk-a02n01=200 mk-a02n02=100
pcs resource enable srv07-el6
- or -
pcs resource update srv07-el6 ocf:alteeve:server name="srv07-el6" meta allow-migrate="true" migrate_to="INFINITY" stop="INFINITY" op monitor interval="60" on-fail="block"

# Test
stonith_admin --fence el8-a01n02 --verbose; crm_error $?


stonith-max-attempts=INFINITY
cluster-recheck-interval puts an upper bound on the "i give up" time

==== DRBD notes

* resources can contain an US-ASCII character, except for spaces
* A resource is a single replication stream for 1 or more resources, max 65.535 vols per resource
* DRBD does, however, ship with an LVM integration facility that automates the creation of LVM snapshots immediately before synchronization. This ensures that a consistent copy of the data is always available on the peer, even while synchronization is running. See Using automated LVM snapshots during DRBD synchronization for details on using this facility.
** https://docs.linbit.com/docs/users-guide-9.0/#s-lvm-snapshots
* Checksum-based synchronization computes a block's hash on source and target and skips if matching, possibly making resync much faster for blocks rewritten with the same data, but at the cost of CPU. Make this a user-configurable option under the advanced tab.
* Suspended replication allows congested replication links to suspend replication, leaving the peer in a consistent state, but allowing the primary to "pull ahead". When the congestion passes, the delta resyncs. Make this a user-configurable option with scary warnings.
* Online verification can (should?) be run periodically on the server host (verification source will overwrite deltas on the verification target). Perhaps schedule to run once/month? Do resource sequentially as this places a CPU load on the nodes.
* Replication traffic integrity checking uses a given available kernel crypto to verify data integrity on transmission to the peer. If the replicated block can not be verified against the digest, the connection is dropped and immediately re-established; because of the bitmap the typical result is a retransmission.
** Make an option in the advanced tab. Test to see overhead this adds. Choose the lowest overhead algo (within reason)
* Support for disk flushes might be something we want to disable, as it seems to force write-through even with a function FBWC/BBU. Need to test.
* Note; "Inconsistent" is almost always useless. "Consistent" and "Outdated" are able to be used safely, just without whatever happened on the peer after.
* Truck based replication, also known as disk shipping, is a means of preseeding a remote site with data to be replicated, by physically shipping storage media to the remote site.
* Make sure that selinux doesn't block DRBD comms over the SN
* See "5.15.1. Growing on-line" for growing a DRBD resource
** Shrinking online is ONLY possible if the metadata is external. Worth creating *_md LVs? Offline requires backing up and restoring the MD

Provisioning a server will need to:
* Create the LVs
* Open up the DRBD ports
* Create the DRBD resource(s); Find the lowest free rX.res, create it locally and on the peer (if up),

firewall-cmd --zone=public --permanent --add-port=7788-7790/tcp
firewall-cmd --reload

* Provision the server via virt-install
* push the new XML to striker such that the peer's anvil daemon picks it up and writes it out.

[root@el8-a01n01 drbd.d]# drbdsetup status r0 --verbose --statistics
r0 node-id:1 role:Primary suspended:no
    write-ordering:flush
  volume:0 minor:0 disk:UpToDate quorum:yes
      size:10485404 read:9682852 written:0 al-writes:0 bm-writes:0 upper-pending:0 lower-pending:0 al-suspended:no blocked:no
  el8-a01n02.alteeve.com node-id:0 connection:Connected role:Secondary congested:no
    volume:0 replication:SyncSource peer-disk:Inconsistent done:92.29 resync-suspended:no
        received:0 sent:9679140 out-of-sync:808144 pending:6 unacked:3

[root@el8-a01n02 ~]# cat /sys/kernel/debug/drbd/resources/r0/connections/el8-a01n01.alteeve.com/0/proc_drbd
 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r-----
    ns:24360 nr:10485404 dw:10485404 dr:25420 al:0 bm:0 lo:0 pe:[0;0] ua:0 ap:[0;0] ep:1 wo:2 oos:10461044
	[>....................] sync'ed:  0.3% (10212/10236)M
	finish: 0:50:01 speed: 3,480 (5,020 -- 3,480) K/sec
	 99% sector pos: 20970808/20970808
	resync: used:0/61 hits:557 misses:2 starving:0 locked:0 changed:1
	act_log: used:0/1237 hits:0 misses:0 starving:0 locked:0 changed:0
	blocked on activity log: 0

[root@el8-a01n02 ~]# drbdadm primary r0
r0: State change failed: (-1) Multiple primaries not allowed by config
Command 'drbdsetup primary r0' terminated with exit code 11

[root@el8-a01n02 ~]# drbdadm net-options --allow-two-primaries=yes r0
[root@el8-a01n02 ~]# drbdadm net-options --allow-two-primaries=no r0

drbdsetup show all
drbdsetup show all --show-defaults

== virt-install stuff
* Get a list of --os-variants: 'osinfo-query os'
* virt-install --print-xml (or --transient)
* Migate;
# For all resources under the server;
#drbdadm net-options r0 --allow-two-primaries=yes

drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes
drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes

# Migrate:

virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system list

virsh migrate --unsafe --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system
virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system  migrate --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system

# Again for all resource under the server;
drbdadm net-options r0 --allow-two-primaries=no

drbdsetup net-options <resource> <target_node_id> --_name=<target_node_name> --allow-two-primaries=yes
virsh migrate --undefinesource --live <server> qemu+ssh://<target_node>/system
drbdsetup net-options <resource> <target_node_id> --_name=<target_node_name> --allow-two-primaries=no

pcs constraint list --full
Location Constraints:
  Resource: srv01-c7
    Enabled on: m3-a02n02.alteeve.com (score:50) (id:location-srv01-c7-m3-a02n02.alteeve.com-50)
pcs constraint remove location-srv01-c7-m3-a02n02.alteeve.com-50


Set to 90% of BCN bandwidth
       migrate-setspeed domain bandwidth
           Set the maximum migration bandwidth (in MiB/s) for a domain which is being migrated to another host. bandwidth is interpreted as an
           unsigned long long value. Specifying a negative value results in an essentially unlimited value being provided to the hypervisor. The
           hypervisor can choose whether to reject the value or convert it to the maximum value allowed.

       migrate-getspeed domain
           Get the maximum migration bandwidth (in MiB/s) for a domain.


# Provision servers
mkdir /mnt/anvil/{provision,files,archive,definitions}


== Resource Agent; https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc

* A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with OCF_RESKEY_. For example, if the resource has an ip parameter set to 192.168.1.1, then the resource agent will have access to an environment variable OCF_RESKEY_ip holding that value.


===

When stopping a server;
14:03 < lge> "on-fail: block"
14:03 < lge> is per operation type.
14:08 < lge> anyways, you can also "on-fail: retry"

OK, set the stop timeout to 60, set 'on-fail: block" and set the failure-timeout to 60 and see how pacemaker reacts.
failure-timeout

===

Migrate servers;

- Let ScanCore set 'node-health' attribute (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-node-health)
- Set 'migration-limit' to '1' to enforce serial live migration (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-cluster-options).

Migrate a single server by setting a location constraint against the node we want the VM off of.
- If anything goes wrong, the server will enter a blocked state in pacemaker.
- Recovery needs to be 'unmanage -> clean' to avoid a stop call.
11:57 <@kgaillot> for your design, that sounds right. between cleanup and manage, i'd make sure there was a PE run without any pending actions blocked by the unmanaging -- you can either look at the logs on the DC, run "crm_simulate -SL", or just check the status for a bit
11:58 <@kgaillot> you can play around with it by putting a higher preference on the to-be-cleaned node, to make sure it *does* move when you re-manage. that way you can see what logs/simulate/status look like

12:07 <@kgaillot> i'm thinking if you do crm_resource --reprobe instead of cleanup in the above sequence, that should prevent anything unexpected
12:07 <@kgaillot> unmanage -> adjust preferences if needed -> reprobe resource -> wait for probe results to come back in, and if status looks good -> re-manage
12:08 <@kgaillot> the reprobe will wipe the entire resource history and fail counts for the resource, causing pacemaker to recheck the current status on all nodes. if the status then shows the resource running where you expect/want it, with no errors, then it's not going to do anything further
12:09 <@kgaillot> (in 2.0, cleanup only erases the history where the resource has failed, while reprobe erases the history regardless)
12:13 <@kgaillot> if there are no failures in the resource history, there should be no risk of a full stop. if there is no resource history at all, then after reprobe, there should be no risk of any actions (assuming you've set up location preferences and stickiness how you want them)

Recover from a failed migration;

reset location to prefer current host -> unmanage resource -> cleanup resource -> manage resource

(running on node 2, so re-add location constraint - basically, make sure location constraint favours current host)


https://pykickstart.readthedocs.io/en/latest/kickstart-docs.html#chapter-1-introduction

==== Sample kickstart for Fedora28 netinstall
#version=DEVEL
ignoredisk --only-use=vda
# Partition clearing information
clearpart --none --initlabel
# Use graphical install
graphical
# Use network installation
url --url="http://10.1.4.1/rhel8/x86_64/iso/"
# Keyboard layouts
keyboard --vckeymap=us --xlayouts='us'
# System language
lang en_CA.UTF-8

# Network information
network  --bootproto=dhcp --device=ens3 --ipv6=auto --activate
network  --hostname=localhost.localdomain
# Root password
rootpw --iscrypted $6$fyAht.3wBVlRGgqG$5dqIv2NrBD87uA51fxuoic/t2G93pXPUjVlh27Avg20ZGY409SK8cMVgABswF.krJSVIyoHfIChXNfpP/qTjI1
# Run the Setup Agent on first boot
firstboot --enable
# Do not configure the X Window System
skipx
# System services
services --enabled="chronyd"
# System timezone
timezone Etc/GMT --isUtc
# System bootloader configuration
bootloader --location=mbr --boot-drive=vda

%packages
@^server-product-environment

%end

%addon com_redhat_kdump --disable --reserve-mb='128'

%end

%anaconda
pwpolicy root --minlen=6 --minquality=1 --notstrict --nochanges --notempty
pwpolicy user --minlen=6 --minquality=1 --notstrict --nochanges --emptyok
pwpolicy luks --minlen=6 --minquality=1 --notstrict --nochanges --notempty
%end
====

==== M3 Striker Kickstart
# Setion 1
# Command section – Refer to Chapter 2 for a list of kickstart options. You must include the required options.


### NOTE: The %packages, %pre, %pre-install, %post, %onerror, and %traceback sections are all required to be closed with %end
# Section 2
# The %packages section – Refer to Chapter 3 for details.


# Section 3:
# The %pre, %pre-install, %post, %onerror, and %traceback sections – These sections can be in any order and are not required. Refer to Chapter 4, Chapter 5, and Chapter 6 for details.
====

  0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/sdb | grep -e ID_PATH=)
  0 root@pulsar:/var/lib/libvirt/images# if  [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi
USB drive
  0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/sda | grep -e ID_PATH=)
  0 root@pulsar:/var/lib/libvirt/images# if  [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi
SATA drive
  0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/nvme0n1 | grep -e ID_PATH=)
  0 root@pulsar:/var/lib/libvirt/images# if  [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi
NVMe drive
[root@localhost ~]# dev_PATH=$(udevadm info /dev/sda | grep -e ID_PATH=)
[root@localhost ~]# if  [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi
SCSI drive


0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/model
Flash Disk
  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/model
SanDisk SDSSDXPS
  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/model
INTEL SSDPEKKW512G7

### Stuff only NVMe has
 1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/subsysnqn
nqn.2014.08.org.nvmexpress:80868086BTPY63650FPG512F    INTEL SSDPEKKW512G7
  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/subsysnqn
cat: /sys/class/block/sda/device/subsysnqn: No such file or directory
  1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/subsysnqn
cat: /sys/class/block/sdb/device/subsysnqn: No such file or directory

  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/serial
BTPY63650FPG512F
  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/serial
cat: /sys/class/block/sda/device/serial: No such file or directory
  1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/serial
cat: /sys/class/block/sdb/device/serial: No such file or directory

  1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/transport
pcie
  0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/transport
cat: /sys/class/block/sda/device/transport: No such file or directory
  1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/transport
cat: /sys/class/block/sdb/device/transport: No such file or directory


Disk size:
/sys/class/block/sda/size * <block size>

=====
man pages

1   Executable programs or shell commands
5   File formats and conventions eg /etc/passwd
7   Miscellaneous (including macro packages and conventions), e.g. man(7), groff(7)
8   System administration commands (usually only for root)

A manual page consists of several sections.

Conventional section names include NAME, SYNOPSIS, CONFIGURATION, DESCRIPTION, OPTIONS, EXIT STATUS, RETURN VALUE, ERRORS, ENVIRONMENT, FILES, VERSIONS, CONFORMING TO, NOTES, BUGS, EXAMPLE, AUTHORS, and SEE ALSO.

The following conventions apply to the SYNOPSIS section and can be used as a guide in other sections.

bold text          type exactly as shown.
italic text        replace with appropriate argument.
[-abc]             any or all arguments within [ ] are optional.
-a|-b              options delimited by | cannot be used together.
argument ...       argument is repeatable.
[expression] ...   entire expression within [ ] is repeatable.


====

BEGIN TRANSACTION;
DROP FUNCTION history_alerts() CASCADE;
DROP TABLE history.alerts;
DROP TABLE alerts;

CREATE TABLE alerts (
    alert_uuid             uuid                        not null    primary key,
    alert_host_uuid        uuid                        not null,                    -- The name of the node or dashboard that this alert came from.
    alert_set_by           text                        not null,
    alert_level            integer                     not null,                    -- 1 (critical), 2 (warning), 3 (notice) or 4 (info)
    alert_title            text                        not null,                    -- ScanCore will read in the agents <name>.xml words file and look for this message key
    alert_message          text                        not null,                    -- ScanCore will read in the agents <name>.xml words file and look for this message key
    alert_sort_position    integer                     not null    default 9999,    -- The alerts will sort on this column. It allows for an optional sorting of the messages in the alert.
    alert_show_header      integer                     not null    default 1,       -- This can be set to have the alert be printed with only the contents of the string, no headers.
    modified_date          timestamp with time zone    not null,

    FOREIGN KEY(alert_host_uuid) REFERENCES hosts(host_uuid)
);
ALTER TABLE alerts OWNER TO admin;

CREATE TABLE history.alerts (
    history_id             bigserial,
    alert_uuid             uuid,
    alert_host_uuid        uuid,
    alert_set_by           text,
    alert_level            integer,
    alert_title            text,
    alert_message          text,
    alert_sort_position    integer,
    alert_show_header      integer,
    modified_date          timestamp with time zone    not null
);
ALTER TABLE history.alerts OWNER TO admin;

CREATE FUNCTION history_alerts() RETURNS trigger
AS $$
DECLARE
    history_alerts RECORD;
BEGIN
    SELECT INTO history_alerts * FROM alerts WHERE alert_uuid = new.alert_uuid;
    INSERT INTO history.alerts
        (alert_uuid,
         alert_host_uuid,
         alert_set_by,
         alert_level,
         alert_title,
         alert_title_variables,
         alert_message,
         alert_message_variables,
         alert_sort_position,
         alert_show_header,
         modified_date)
    VALUES
        (history_alerts.alert_uuid,
         history_alerts.alert_host_uuid,
         history_alerts.alert_set_by,
         history_alerts.alert_level,
         history_alerts.alert_title,
         history_alerts.alert_message,
         history_alerts.alert_sort_position,
         history_alerts.alert_show_header,
         history_alerts.modified_date);
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
ALTER FUNCTION history_alerts() OWNER TO admin;

CREATE TRIGGER trigger_alerts
    AFTER INSERT OR UPDATE ON alerts
    FOR EACH ROW EXECUTE PROCEDURE history_alerts();

COMMIT;

====

3rd party stuff;

dnf install autoconf automake bzip2-devel corosynclib-devel gnutls-devel help2man libqb-devel libtool \
            libtool-ltdl-devel libuuid-devel libxml2-devel libxslt-devel pam-devel "pkgconfig(dbus-1)" \
	    "pkgconfig(glib-2.0)" python3-devel asciidoc inkscape publican booth-site diffstat \
	    fence-agents-apc fence-agents-ipmilan fence-agents-scsi fence-virt python3-lxml ruby-devel \
	    rubygem-backports rubygem-ethon rubygem-ffi rubygem-multi_json rubygem-open4 rubygem-rack \
	    rubygem-rack-protection rubygem-rack-test rubygem-sinatra rubygem-test-unit flex perl-generators
----
pacemaker;
git tag
git checkout <2.x>
make srpm
mv /home/digimer/anvil/builds/pacemaker/pacemaker-2.0.0-0.1.rc2.fedora.src.rpm ~/rpmbuild/SRPMS/
----
pcs;
- Requires pacemaker 2.x install
git tag
git checkout <0.10.x>

====

Building 3rd party tools
dnf -y install bzip2-devel corosynclib-devel docbook-style-xsl gnutls-devel help2man libqb-devel libtool libtool-ltdl-devel libuuid-devel libxml2-devel libxslt-devel ncurses-devel pam-devel

==========

Build Dependency chain;

perl-Log-Log4perl
  perl-Log-Dispatch-FileRotate
perl-MIME-Lite

===========================================================================================================

RPM build order

--- Install from RHEL repos;

- Groups;
dnf group install development rpm-development-tools

- Uninstall
dnf remove biosdevname

- Packages
dnf -y install bash-completion bind-utils dnf-utils firefox gdm libgcrypt-devel libutempter-devel libvirt-bash-completion ncurses-devel openssl-devel pam-devel perl-Capture-Tiny perl-Devel-CheckLib perl-Digest-SHA1 perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-HTML-Parser \
            perl-IO-stringy perl-MailTools perl-Module-Build perl-Module-Install perl-Module-Install-ReadmeFromPod perl-Net-DNS perl-Test-Exception perl-Test-Simple perl-Test-Pod perl-Test-Pod-Coverage \
            perl-Test2-Suite rpm-build systemd-devel texinfo virt-manager wget \
            perl-Params-ValidationCompiler perl-Dist-CheckConflicts perl-namespace-autoclean perl-Test-Fatal perl-Devel-GlobalDestruction perl-IPC-Run3 perl-Specio perl-Sys-Syslog postfix perl-DBI perl-IPC-SysV perl-Test perl-Filter rrdtool-perl perl-Test-Warn perl-Date-Manip \
            perl-MIME-Types python36 docbook-style-xsl libxslt flex kernel-devel perl-IPC-System-Simple xdg-user-dirs perl-Import-Into perl-Class-XSAccessor perl-Sub-Name perl-DynaLoader-Functions perl-Devel-CallChecker perl-Test-Requires \
            apr-devel apr-util-devel gdbm-devel httpd-devel perl-CGI perl-libwww-perl perl-Path-Tiny perl-ExtUtils-Embed perl-XML-DOM perl-Test-Taint perl-Readonly perl-Devel-Peek perl-String-ShellQuote elfutils-libelf-devel po4a

--- First round of builds;
alteeve-el8-repo
anvil
anvil-striker-extra
htop
libssh2
perl-Algorithm-C3
perl-B-Compiling
perl-BSD-Resource
perl-Devel-ArgNames
perl-Devel-Refcount
perl-Email-Date-Format
perl-Exporter-Lite
perl-ExtUtils-Config
perl-ExtUtils-Depends
perl-ExtUtils-Helpers
perl-Eval-WithLexicals
perl-Fennec-Lite
perl-HTML-Strip
perl-File-BaseDir
perl-File-MimeInfo
perl-IO-CaptureOutput
perl-Lexical-SealRequireHints
perl-Linux-Pid
perl-Log-Journald
perl-Net-Domain-TLD
perl-Mail-Sender
perl-Mail-Sendmail
perl-Module-Install-GithubMeta
perl-Net-Domain-TLD
perl-Net-OpenSSH	           # Requires the builder enter their password
perl-Proc-Simple
perl-Sub-Quote
perl-Test-Identity
perl-Test-Needs
perl-Test-UseAllModules
perl-UUID-Tiny

--- Second round of builds;

# RHEL 8 blocks the install of libssh2-devel via dnf, so rpm to instal it
rpm -Uvh /var/www/html/repo/RPMS/x86_64/libssh2-1.8.0-8.el8.x86_64.rpm /var/www/html/repo/RPMS/x86_64/libssh2-devel-1.8.0-8.el8.x86_64.rpm
dnf -y install perl-IO-CaptureOutput perl-Exporter-Lite perl-Module-Install-GithubMeta perl-Net-Domain-TLD perl-Test-UseAllModules perl-Mail-Sender perl-Mail-Sendmail perl-Test-Needs perl-Email-Date-Format perl-ExtUtils-Depends perl-B-Compiling perl-Lexical-SealRequireHints \
               perl-Sub-Quote perl-BSD-Resource perl-Linux-Pid perl-Algorithm-C3 perl-Fennec-Lite perl-Devel-ArgNames perl-Devel-Refcount perl-ExtUtils-Config perl-ExtUtils-Helpers perl-Test-Identity perl-Eval-WithLexicals

mod_perl			# NOTE: When --sign'ing it, it throws an error. Build without '--sign' and then do 'rpm --addsign <all rpms>'
perl-B-Hooks-OP-Check
perl-Class-C3
perl-Data-Dumper-Concise
perl-Email-Valid
perl-ExtUtils-InstallPaths
perl-Lexical-Var
perl-Meta-Builder
perl-MIME-Lite
perl-Module-Install-AutoLicense
perl-Net-Netmask
perl-Test-Refcount

--- Third round of builds;

dnf -y install perl-Email-Valid perl-Module-Install-AutoLicense perl-MIME-Lite perl-B-Hooks-OP-Check perl-Lexical-Var mod_perl perl-Class-C3 perl-Data-Dumper-Concise perl-ExtUtils-InstallPaths perl-Meta-Builder perl-Test-Refcount

perl-bareword-filehandles
perl-Email-Find
perl-Future
perl-Log-Dispatch
perl-Module-Build-Tiny
perl-Module-Install-CheckLib
perl-multidimensional
### NOTE: These two need to be built using bootstrap, and will be rebuilt later.
perl-Devel-Declare            # NOTE: rpmbuild -ba --define='perl_bootstrap=1' perl-Devel-Declare.spec, build perl-Devel-CallParser, rebuild this without bootstrap.
perl-indirect                 # NOTE: rpmbuild -ba --define='perl_bootstrap=1' perl-indirect.spec, build perl-Devel-CallParser, rebuild this without bootstrap.


--- Fourth round of builds;

dnf -y install perl-Email-Find perl-Module-Install-CheckLib perl-Log-Dispatch perl-Devel-Declare perl-bareword-filehandles perl-multidimensional perl-indirect perl-Future perl-Module-Build-Tiny

perl-aliased
perl-Devel-CallParser
perl-HTML-FromText
perl-Log-Dispatch-FileRotate
perl-Net-SSH2
perl-strictures

--- Fifth round of builds;

dnf -y install perl-Log-Dispatch perl-strictures perl-Devel-CallParser perl-Log-Dispatch-FileRotate perl-aliased

perl-Exporter-Declare
perl-Log-Log4perl
perl-Moo
### NOTE: We're rebuilding these two, this time without bootstrap
perl-Devel-Declare
perl-indirect


--- Sixth round of builds;

### NOTE: expire-cache isn't enough to clear the boot-strapped versions
dnf clean all
dnf reinstall perl-Devel-Declare perl-indirect
dnf -y install perl-Moo perl-Log-Log4perl perl-Exporter-Declare

perl-CPAN-Changes
perl-Log-Contextual


--- Seventh round of builds;

dnf -y install perl-CPAN-Changes perl-Log-Contextual

perl-File-DesktopEntry
perl-Object-Remote

--- Seventh round, final installs.

dnf -y install perl-File-DesktopEntry perl-Object-Remote perl-Net-OpenSSH


=======================================

cd SOURCES;
tar -xzvf $tarball (or xjvf, whatever);
tar -xzv -C package-digimer -f $tarball;
patch in *-digimer;
diff -uNr $package $package-digimer > ../SOURCES/whatever.patch

00:24 < Bahhumbug>
Burst the source tarball;
do it again but stash this copy in a *-digimer directory.
Do patching in the -digimer directory and when finished great a recursive unified diff and place the results directly in the named .patch file.


cd ~/rpmbuild/SOURCES
tar -xzvf htop-2.2.0.tar.gz
mv htop-2.2.0 htop-2.2.0-digimer
cd htop-2.2.0-digimer
# patch
diff -uNr htop-2.2.0 htop-2.2.0-digimer > whatever.patch

# Fabio's way
diff -Naurd htop-2.2.0 htop-2.2.0-digimer > htop_python3_MakeHeader.patch

=======================================

chrissie's cluster script

# Run these commands on all nodes:
cp ../ifup-local /sbin
/sbin/ifup-local
pcs host auth -uhacluster -phacluster amy.chrissie.net anna.chrissie.net clara.chrissie.net fanny.chrissie.net

if [ "$(hostname)" != "amy.chrissie.net" -a "$(hostname)" != "amy" ]
then
  exit
fi

# and these on just one:
pcs cluster setup taroxVMs amy.chrissie.net anna.chrissie.net clara.chrissie.net fanny.chrissie.net
pcs cluster start --all
sleep 30

pcs stonith create fence-virsh fence_virsh ipaddr=192.168.100.1 login=root passwd=christine pcmk_host_map="amy:rhel8-1;anna:rhel8-2;clara:rhel8-3;fanny:rhel8-4"


if [ ! -e '/dev/an-a01n01_vg0/srv09-psql_0' ];
then
    /sbin/lvcreate -L 69GiB -n srv09-psql_0 an-a01n01_vg0
fi
virt-install --connect qemu:///system \
  --name srv09-psql \
  --ram 4096 \
  --arch x86_64 \
  --vcpus 2 \
  --cpu Nehalem,+fsgsbase \
  --cdrom '/shared/files/Win2016_Server_64-bit_English.iso' \
  --boot menu=on \
  --disk path='/shared/files/virtio-win.iso',device=cdrom --force\
  --os-variant win2k8 \
  --network bridge=ifn_bridge1,model=virtio \
  --disk path=/dev/an-a01n01_vg0/srv09-psql_0,bus=virtio,cache=writethrough \
  --graphics spice \
  --noautoconsole --wait -1 > /var/log/anvil-server_srv09-psql.log &


# Migration;


pcs constraint remove $(pcs constraint show --full | grep ban-srv07-el6 | perl -pe 's/^.*?id:(.*?)\)/$1/')


DRBD 9 - Check;
/sys/kernel/debug/drbd/resources/${resource_name}/connections/${hostname}/0/proc_drbd

====== New style
<?xml version="1.0" encoding="UTF-8"?>
<!--
Generated on:    2019-06-20, 15:32:27
Striker Version: 2.0.7
-->

<anvil name="xx-anvil-01">
	<machines>
		<node name="xx-a01n01.digimer.ca" uuid="xxx">
			<network name="bcn1" ntp=""  ethtool_opts="" mtu="1500" default_gateway="0" >
				<!-- subnet can be in "/xx" format -->
				<address ip="10.201.10.1" subnet="255.255.0.0" gateway="" default_gateway="0" dns="" />
				<interface name="bcn1_link1" mac="xx:xx:xx:xx:xx:xx"/>
				<interface name="bcn1_link2" mac="xx:xx:xx:xx:xx:yy"/>
			</network>
			<fence>
				<!-- IPMI data comes from hosts -> host_ipmi. If it is found, it always is used as the first fence device -->
				<!-- PDU shows how to reference devices -->
				<method name="pdu" type="pdu" order="1">
					<!-- The 'name' parameter has to match an entry under devices -> pdu's name -->
					<device name="xx-pdu01" port="1" />
					<device name="xx-pdu02" port="2" />
				</method>
				<!-- This would only happen on it's own, but is here for example. The 'server_name' is the name of the VM on the host -->
				<method name="kvm" type="kvm" order="1">
					<device name="host1" server_name="xx-a01n01" />
				</method>
			</fence>
			<power>
				<!-- The 'name' parameter has to match an entry under devices -> ups's name -->
				<ups name="xx-ups01" />
				<ups name="xx-ups02" />
			</power>
		</node>
		<dr name="xx-a01dr01.digimer.ca" uuid="xxx">
			<!-- IPMI is used to power on/off for scheduled, periodic resyncs. -->
		</dr>
	</machines>
	<!-- These devices need to reference entries in the 'fences' database table.
	<fences>
		<!-- When a machine references these, the 'type="x"' references the child element and the contained 'name="x"' references the child's child element by name -->
		<pdu>
			<pdu name="xx-pdu01" agent="fence_apc_snmp" address="10.20.2.1" />
			<pdu name="xx-pdu02" agent="fence_apc_snmp" address="10.20.2.2" />
		</pdu>
		<!-- UPSes are used so that we know which UPSes feed a given node, when deciding power event actions -->
		<ups>
			<ups name="xx-ups01" address="10.20.3.1" />
			<ups name="xx-ups02" address="10.20.3.2" />
		</ups>
		<!-- In cases where VMs are being used. Later we can add support for VMWare -->
		<kvm>
			<kvm name="host1" address="192.168.122.1" user="root" password="xxx" />
		</kvm>
	</fences>
</anvil>

====== Old manifest style
<?xml version="1.0" encoding="UTF-8"?>

<!--
Generated on:    2019-06-20, 15:32:27
Striker Version: 2.0.7
-->

<config>
	<node name="mk-a01n01.digimer.ca" uuid="71822143-2a4d-43b4-839b-7c66b3c2e4d7">
		<network>
			<bcn ip="10.20.10.1" />
			<sn ip="10.10.10.1" />
			<ifn ip="10.255.10.1" />
		</network>
		<ipmi>
			<on reference="ipmi_n01" ip="10.20.11.1" netmask="255.255.0.0" user="admin" password="Initial1" gateway="" lanplus="false" privlvl="USER" />
		</ipmi>
		<pdu>
			<on reference="pdu01" port="1" />
			<on reference="pdu02" port="1" />
			<on reference="pdu03" port="" />
			<on reference="pdu04" port="" />
		</pdu>
		<kvm>
			<!-- port == virsh name of VM -->
			<on reference="kvm_host" port="" />
		</kvm>
		<interfaces>
			<interface name="bcn_link1" mac="f8:0f:41:f8:6b:fe" />
			<interface name="bcn_link2" mac="00:19:99:ff:ba:b4" />
			<interface name="sn_link1" mac="f8:0f:41:f8:6b:ff" />
			<interface name="sn_link2" mac="00:19:99:ff:8b:5a" />
			<interface name="ifn_link1" mac="00:19:99:ff:ba:b5" />
			<interface name="ifn_link2" mac="00:19:99:ff:8b:59" />
		</interfaces>
	</node>
	<node name="mk-a01n02.digimer.ca" uuid="f7a7b2be-a10a-40f0-991d-2265e3ec3cce">
		<network>
			<bcn ip="10.20.10.2" />
			<sn ip="10.10.10.2" />
			<ifn ip="10.255.10.2" />
		</network>
		<ipmi>
			<on reference="ipmi_n02" ip="10.20.11.2" netmask="255.255.0.0" user="admin" password="Initial1" gateway="" lanplus="false" privlvl="USER" />
		</ipmi>
		<pdu>
			<on reference="pdu01" port="2" />
			<on reference="pdu02" port="2" />
			<on reference="pdu03" port="" />
			<on reference="pdu04" port="" />
		</pdu>
		<kvm>
			<on reference="kvm_host" port="" />
		</kvm>
		<interfaces>
			<interface name="bcn_link1" mac="00:26:2d:0c:a8:74" />
			<interface name="bcn_link2" mac="00:19:99:ff:bb:4e" />
			<interface name="sn_link1" mac="00:26:2d:0c:a8:75" />
			<interface name="sn_link2" mac="00:19:99:ff:bb:8b" />
			<interface name="ifn_link1" mac="00:19:99:ff:bb:4f" />
			<interface name="ifn_link2" mac="00:19:99:ff:bb:8a" />
		</interfaces>
	</node>
	<common>
		<networks>
			<bcn netblock="10.20.0.0" netmask="255.255.0.0" gateway="" defroute="no" ethtool_opts="" />
			<sn netblock="10.10.0.0" netmask="255.255.0.0" gateway="" defroute="no" ethtool_opts="" />
			<ifn netblock="10.255.0.0" netmask="255.255.0.0" gateway="10.255.255.254" dns1="8.8.8.8" dns2="8.8.4.4" ntp1="" ntp2="" defroute="yes" ethtool_opts="" />
			<bonding opts="mode=1 miimon=100 use_carrier=1 updelay=120000 downdelay=0">
				<bcn name="bcn_bond1" primary="bcn_link1" secondary="bcn_link2" />
				<sn name="sn_bond1" primary="sn_link1" secondary="sn_link2" />
				<ifn name="ifn_bond1" primary="ifn_link1" secondary="ifn_link2" />
			</bonding>
			<bridges>
				<bridge name="ifn_bridge1" on="ifn" />
			</bridges>
			<mtu size="1500" />
		</networks>
		<repository urls="" />
		<media_library size="40" units="GiB" />
		<storage_pool_1 size="100" units="%" />
		<anvil prefix="mk" sequence="01" domain="digimer.ca" password="Initial1" striker_user="" striker_database="" />
		<ssh keysize="8191" />
		<cluster name="mk-anvil-01">
			<!-- Set the order to 'kvm' if building on KVM-backed VMs. Also set each node's 'port=' above and '<kvm>' element attributes below. -->
			<fence order="ipmi,pdu" post_join_delay="90" delay="15" delay_node="mk-a01n01.digimer.ca" />
		</cluster>
		<drbd>
			<disk disk-barrier="no" disk-flushes="no" md-flushes="no" c-plan-ahead="1" c-max-rate="110M" c-min-rate="30M" c-fill-target="1M" />
			<options cpu-mask="" />
			<net max-buffers="8192" sndbuf-size="" rcvbuf-size="" />
		</drbd>
		<switch>
			<switch name="mk-switch01.digimer.ca" ip="10.20.1.1" />
			<switch name="mk-switch02.digimer.ca" ip="10.20.1.2" />
		</switch>
		<ups>
			<ups name="mk-ups01.digimer.ca" type="apc" port="3551" ip="10.20.3.1" />
			<ups name="mk-ups02.digimer.ca" type="apc" port="3552" ip="10.20.3.2" />
		</ups>
		<pdu>
			<pdu reference="pdu01" name="mk-pdu01.digimer.ca" ip="10.20.2.1" agent="fence_apc_alteeve" />
			<pdu reference="pdu02" name="mk-pdu02.digimer.ca" ip="10.20.2.2" agent="fence_apc_alteeve" />
		</pdu>
		<ipmi>
			<ipmi reference="ipmi_n01" agent="fence_ipmilan" />
			<ipmi reference="ipmi_n02" agent="fence_ipmilan" />
		</ipmi>
		<kvm>
			<kvm reference="kvm_host" ip="192.168.122.1" user="root" password="" password_script="" agent="fence_virsh" />
		</kvm>
		<striker>
			<striker name="mk-striker01.digimer.ca" bcn_ip="10.20.4.1" ifn_ip="10.255.4.1" database="" user="" password="" uuid="" />
			<striker name="mk-striker02.digimer.ca" bcn_ip="10.20.4.2" ifn_ip="10.255.4.2" database="" user="" password="" uuid="" />
		</striker>
		<update os="true" />
		<iptables>
			<vnc ports="100" />
		</iptables>
		<servers>
			<!-- This isn't used anymore, but this section may be useful for other things in the future, -->
			<!-- <provision use_spice_graphics="0" /> -->
		</servers>
		<tools>
			<use anvil-safe-start="true" anvil-kick-apc-ups="false" />
		</tools>
	</common>
</config>


# Attach a network interface:
virsh attach-interface win2019_test bridge ifn_bridge1 --live --model virtio

# Detach a network interface:
virsh detach-interface win2019_test bridge --mac 52:54:00:ee:b5:1d

# Change the MTU of a device;
ip link set <dev> mtu 9000

# Change the MTU of an interface in windows (not tested yet - http://networking.nitecruzr.net/2007/11/setting-mtu-in-windows-vista.html)
netsh interface ipv4 set subinterface "Local Area Connection" mtu=nnnn store=persistent

yum install kernel-2.6.32-754.27.1.el6.x86_64 kernel-devel-2.6.32-754.27.1.el6.x86_64 kernel-headers-2.6.32-754.27.1.el6.x86_64