You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
775 lines
34 KiB
775 lines
34 KiB
Firewall config stuff. |
|
==== |
|
---- Files |
|
[root@f28-striker01 zones]# cat /etc/firewalld/zones/BCN1.xml |
|
<?xml version="1.0" encoding="utf-8"?> |
|
<zone> |
|
<short>BCN1</short> |
|
<description>Back-Channel Network #1 - Used for all inter-machine communication in the Anvil!, as well as communication for foundation pack devices. Should be VLAN-isolated from the IFN and, thus, trusted.</description> |
|
<service name="ssh"/> |
|
<service name="dhcpv6-client"/> |
|
<service name="cockpit"/> |
|
<service name="postgresql"/> |
|
<service name="http"/> |
|
<service name="https"/> |
|
<port port="80" protocol="tcp"/> |
|
<port port="443" protocol="tcp"/> |
|
</zone> |
|
|
|
[root@f28-striker01 zones]# cat /etc/firewalld/zones/IFN1.xml |
|
<?xml version="1.0" encoding="utf-8"?> |
|
<zone> |
|
<short>IFN1</short> |
|
<description>Internet/Intranet-Facing Network #1 - Used for all client/user facing traffic. Likely connected to a semi-trusted network only.</description> |
|
<service name="ssh"/> |
|
<service name="postgresql"/> |
|
<service name="http"/> |
|
<service name="https"/> |
|
<port port="80" protocol="tcp"/> |
|
<port port="443" protocol="tcp"/> |
|
</zone> |
|
|
|
[root@f28-striker01 zones]# cat /etc/firewalld/zones/SN1.xml |
|
<?xml version="1.0" encoding="utf-8"?> |
|
<zone> |
|
<short>SN1</short> |
|
<description>Storage Network #1 - Used for DRBD communication between nodes and DR hosts. Should be VLAN-isolated from the IFN and, thus, trusted.</description> |
|
<service name="ssh"/> |
|
</zone> |
|
---- |
|
Reload; |
|
firewall-cmd --reload |
|
|
|
### These are permanent |
|
# Put the interfaces under the appropriate zones. |
|
firewall-cmd --zone=IFN1 --change-interface=ifn1_bond1 |
|
firewall-cmd --zone=BCN1 --change-interface=bcn1_bond1 |
|
firewall-cmd --set-default-zone=IFN1 |
|
|
|
### These are temporary unless --permanent is used |
|
# Allow routing/masq'ing through the IFN |
|
firewall-cmd --zone=IFN1 --add-masquerade |
|
|
|
# Check |
|
firewall-cmd --zone=IFN1 --query-masquerade |
|
yes |
|
|
|
# Disable |
|
# NOTE: Doesn't break existing connections |
|
firewall-cmd --zone=IFN1 --remove-masquerade |
|
|
|
|
|
|
|
- Notes; |
|
firewall-cmd --state [running (rc: 0),not running (rc:252)] |
|
|
|
- Paths |
|
If we want to create services or helpers later, look under - /usr/lib/firewalld/ |
|
Core firewalld configs, including defaults zones, etc - /etc/firewalld/ |
|
|
|
- https://www.digitalocean.com/community/tutorials/how-to-set-up-a-firewall-using-firewalld-on-centos-7 |
|
|
|
* Zones are meant to deal with dynamic environments and aren't that useful in mostly static server environments |
|
** Seem to be pre-configured sets of what is/isn't allowed. 'public' for IFN, 'work' for SN/BCN? 'external/internal' are for routing |
|
** Configured in /etc/firewalld/zones/<zone>.xml - Create 'BCN', 'SN' and 'IFN'? |
|
* Use 'firewall-cmd' WITHOUT '--permanent' for things like enabling the VNC port for a server. Use '--permanent' for everything else. |
|
|
|
==== |
|
|
|
|
|
Striker as PXE server |
|
==== |
|
|
|
dnf install dhcp tftp-server syslinux kernel-core |
|
---- |
|
cat /etc/dhcp/dhcpd.conf |
|
### Global options |
|
option domain-name "alteeve.com"; # domain name |
|
option domain-name-servers 8.8.8.8, 8.8.4.4; # |
|
authoritative; |
|
ddns-update-style none; |
|
|
|
subnet 10.1.0.0 netmask 255.255.0.0 { |
|
default-lease-time 600; |
|
max-lease-time 7200; |
|
range 10.1.14.1 10.1.14.254; |
|
option routers 10.1.4.1; |
|
} |
|
---- |
|
|
|
# NOTE: We DON'T enable DHCP. We'll turn it on as needed. |
|
|
|
# NOTE: Apache needs to show dot-files! (anaconda looks for .treeinfo) |
|
==== |
|
[root@f28-striker01 conf.d]# pwd |
|
/etc/httpd/conf.d |
|
[root@f28-striker01 conf.d]# diff -u autoindex.conf.original autoindex.conf |
|
--- autoindex.conf.original 2018-10-13 03:37:00.084687783 -0400 |
|
+++ autoindex.conf 2018-10-13 03:37:41.479635314 -0400 |
|
@@ -89,5 +89,6 @@ |
|
# IndexIgnore is a set of filenames which directory indexing should ignore |
|
# and not include in the listing. Shell-style wildcarding is permitted. |
|
# |
|
-IndexIgnore .??* *~ *# HEADER* README* RCS CVS *,v *,t |
|
+#IndexIgnore .??* *~ *# HEADER* README* RCS CVS *,v *,t |
|
+IndexIgnore *~ *# HEADER* README* RCS CVS *,v *,t |
|
==== |
|
|
|
systemctl start tftp.socket |
|
systemctl enable tftp.socket |
|
|
|
# Bootloader for BIOS |
|
OS="fedora28" |
|
mkdir /var/lib/tftpboot/ |
|
cp $(anvil source)/pxe/tftpboot/bios/* /var/lib/tftpboot/ |
|
|
|
# Bootloader for UEFI |
|
cp $(anvil source)/pxe/tftpboot/uefi/* /var/lib/tftpboot/uefi/ |
|
|
|
# Copy kernel images for tftpboot downloads |
|
mkdir -p /var/lib/tftpboot/${OS} |
|
cp /lib/modules/$(uname -r)/vmlinuz /var/lib/tftpboot/${OS}/ |
|
mkinitrd /var/lib/tftpboot/${OS}/initrd.img $(uname -r) |
|
|
|
# Configs from anvil source |
|
rsync -av pxe/tftpboot/pxelinux.cfg/default root@f28-striker01:/var/lib/tftpboot/pxelinux.cfg/ |
|
rsync -av pxe/tftpboot/pxelinux/uefi root@f28-striker01:/var/lib/tftpboot/pxelinux/ |
|
|
|
==== |
|
|
|
Forewalld Router config |
|
==== |
|
# Allow routing/masq'ing through the IFN |
|
firewall-cmd --zone=IFN --add-masquerade |
|
success |
|
|
|
# Check |
|
firewall-cmd --zone=IFN --query-masquerade |
|
yes |
|
|
|
# Disable |
|
# NOTE: Doesn't break existing connections |
|
firewall-cmd --zone=IFN --remove-masquerade |
|
success |
|
==== |
|
|
|
|
|
DB stuff; |
|
|
|
Dump; |
|
su - postgres -c "pg_dump anvil" > /anvil.out |
|
|
|
Drop; |
|
su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil" |
|
|
|
Reload the DB; |
|
su - postgres -c "dropdb anvil" && su - postgres -c "createdb --owner admin anvil" && su - postgres -c "psql anvil < /anvil.out" |
|
su - postgres -c "psql anvil" |
|
|
|
|
|
|
|
All systems have a UUID, even VMs. Use that for system UUID in the future. |
|
|
|
https://access.redhat.com/solutions/2841131 - How to write a NetworkManager dispatcher script to apply ethtool commands? |
|
|
|
|
|
Setup nodes to log to striker? |
|
https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-configuring_netconsole |
|
|
|
* Pacemaker can be monitored via SNMP; https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/high_availability_add-on_reference/s1-snmpandpacemaker-HAAR |
|
* corosync.conf; https://access.redhat.com/articles/3185291 |
|
|
|
Changes made using tools such as nmcli do not require a reload but do require the associated interface to be put down and then up again. That can be done by using commands in the following format: |
|
* nmcli dev disconnect interface-name |
|
Followed by: |
|
* nmcli con up interface-name |
|
|
|
NOTE: RHEL doesn't support direct-cabled bonds - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/ch-configure_network_bonding |
|
|
|
ifcfg-X config Notes - /usr/share/doc/initscripts-*/sysconfig.txt (Look for the sections describing files /etc/sysconfig/network and /etc/sysconfig/network-scripts/ifcfg-<interface-name>); |
|
- man 5 nm-settings-ifcfg-rh |
|
- https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-Using_Channel_Bonding#s3-modules-bonding-directives |
|
- /usr/share/doc/kernel-doc-*/Documentation/networking/bonding.txt |
|
iface |
|
* PREFIXx overrules NETMASKx. Use PREFIXx, not NETMASKx. |
|
* The 'x' suffice for PREFIX, NETMASK, etc start at 0 and must count up by 1 at a time. |
|
* ZONE will be useful for the firewall stuff later. |
|
* ETHTOOL_OPTS is deprecated, replaced by using udev rules |
|
* initscripts interpret PEERDNS=no to mean "never touch resolv.conf". NetworkManager interprets it to say "never add automatic (DHCP, PPP, VPN, etc.) nameservers to resolv.conf". |
|
Bond |
|
* resend_igmp & num_unsol_na={1~255} may help if a switch is slow to notice traffic has moved to the new interface. default is 1. Each update is send 200ms apart. |
|
* Bridged interfaces should use BRIDGE_UUID="", _not_ BRIDGE="". The former causes the later to be ignored and the later is only used for possible compatibility reasons. |
|
Bridge |
|
* STP=no is default, we'll test 'yes'. |
|
* DOMAIN="<client_domain>" |
|
|
|
# Network Setup |
|
hostnamectl set-hostname m3-a02n01.alteeve.com --static |
|
hostnamectl set-hostname --pretty "Alteeve's Niche! - Anvil! 02, Node 01" |
|
hostname m3-a02n01.alteeve.com |
|
ssh-keygen -t rsa -N "" -b 8191 -f ~/.ssh/id_rsa |
|
|
|
hosts: |
|
==== |
|
10.1.20.1 m3-a02n01.bcn m3-a02n01 m3-a02n01.alteeve.com |
|
10.41.20.1 m3-a02n01.sn |
|
10.255.20.1 m3-a02n01.ifn |
|
|
|
10.1.20.2 m3-a02n02.bcn m3-a02n02 m3-a02n02.alteeve.com |
|
10.41.20.2 m3-a02n02.sn |
|
10.255.20.2 m3-a02n02.ifn |
|
|
|
10.1.20.3 m3-a02dr01.bcn m3-a02dr01 m3-a02dr01.alteeve.com |
|
10.41.20.3 m3-a02dr01.sn |
|
10.255.20.3 m3-a02dr01.ifn |
|
==== |
|
|
|
Example Link config: |
|
==== |
|
==== |
|
|
|
Example Bonding config: |
|
==== |
|
BRIDGE_UUID="e7a8f977-560d-4a94-95cd-a1218f0fe890" |
|
DEVICE="ifn1_bond1" |
|
NAME="IFN 1 - Bond 1" |
|
UUID="a59d138e-6c40-4366-b859-fcadafe577f4" |
|
BONDING_OPTS="mode=active-backup primary=ifn1_link1 updelay=120000 downdelay=0 miimon=100 primary_reselect=better" |
|
TYPE="Bond" |
|
BONDING_MASTER="yes" |
|
BOOTPROTO="none" |
|
IPV6INIT="no" |
|
ONBOOT="yes" |
|
DEFROUTE="no" |
|
BRIDGE="ifn1_bridge1" |
|
ZONE=public |
|
==== |
|
|
|
Example Bridge config: |
|
===== |
|
===== |
|
|
|
======= |
|
virt-manager stores information in dconf-editor -> /org/virt-manager/virt-manager/connections ($HOME/.config/dconf/user) |
|
|
|
==== dconf read /org/virt-manager/virt-manager/connections/uris |
|
['qemu+ssh://root@localhost/system', 'qemu+ssh://root@wp-a01n02.remote/system', 'qemu+ssh://root@an-nas02.kw01.alteeve.ca/system', 'qemu+ssh://root@hb-a01n01.remote/system', 'qemu+ssh://root@hb-a01n02.remote/system', 'qemu:///system'] |
|
==== dconf read /org/virt-manager/virt-manager/connections/autoconnect |
|
['qemu+ssh://root@localhost/system'] |
|
==== |
|
|
|
|
|
### Setup - Striker |
|
|
|
# Packages |
|
depends on: perl-XML-Simple postgresql-server postgresql-plperl postgresql-contrib perl-CGI perl-NetAddr-IP perl-DBD-Pg rsync perl-Log-Journald perl-Net-SSH2 |
|
|
|
# Paths |
|
mkdir /usr/sbin/anvil |
|
|
|
# virsh |
|
virsh net-destroy default |
|
virsh net-autostart default --disable |
|
virsh net-undefine default |
|
|
|
# Web - TODO: Setup to auto-use "Let's Encrypt", but make sure we have an offline fall-back |
|
systemctl enable httpd.service |
|
systemctl start httpd.service |
|
|
|
# Post install |
|
systemctl daemon-reload |
|
|
|
# Firewall |
|
firewall-cmd --permanent --add-service=http |
|
firewall-cmd --permanent --add-service=postgresql |
|
firewall-cmd --reload |
|
|
|
# SELinux |
|
restorecon -rv /var/www |
|
|
|
============================================================= |
|
[root@striker-m3 ~]# cat watch_logs |
|
clear; journalctl -f -a -S "$(date +"%F %R:%S")" -t anvil |
|
|
|
|
|
### Setup - Nodes |
|
|
|
# OS Install |
|
* Set TZ to etc/GMT |
|
* Disable kdump |
|
* Storage; |
|
** 1 = /BIOS Boot (1 MiB) |
|
** 2 = /boot (1 GiB) |
|
** 3 = LVM PV (all remaining space) |
|
*** VG = <short-name>_vg0 |
|
**** <swap> (8 GiB) |
|
**** / (50 GiB) |
|
**** /mnt/anvil (20 GiB) |
|
* 'root' and 'admin' use 'Initial1' (with sudo) |
|
|
|
# OS config |
|
* Register if RHEL proper; |
|
subscription-manager register --username <user> --password <secret> --auto-attach --force |
|
subscription-manager repos --enable=rhel-ha-for-rhel-7-server-rpms |
|
subscription-manager repos --enable=rhel-7-server-optional-rpms |
|
* Packages to install; |
|
|
|
*** DASHBOARDS |
|
rpm -Uvh https://www.alteeve.com/an-repo/el7/alteeve-el7-repo-0.1-1.noarch.rpm |
|
yum install perl-CGI perl-DBD-Pg perl-DBI perl-Log-Journald perl-Net-SSH2 perl-NetAddr-IP perl-XML-Simple postgresql-contrib postgresql-plperl postgresql-server rsync |
|
|
|
*** NODES |
|
rpm -Uvh https://www.alteeve.com/an-repo/el7/alteeve-el7-repo-0.1-1.noarch.rpm |
|
yum install bash-completion bind-utils bridge-utils drbd drbd-bash-completion drbd-kernel drbd-utils fence-agents-all fence-agents-virsh gpm kernel-doc kmod-drbd libvirt libvirt-daemon libvirt-daemon-driver-qemu libvirt-daemon-kvm libvirt-docs mlocate pacemaker pcs perl-Data-Dumper perl-JSON perl-XML-Simple qemu-kvm qemu-kvm-common qemu-kvm-tools rsync screen vim virt-install |
|
|
|
* Packages to remove; |
|
yum remove biosdevname |
|
|
|
* Service management; |
|
systemctl start gpm.service |
|
|
|
* Network; |
|
** {bc,if,s}nX_{link,bond,bridge}Y naming |
|
** firewall; - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/high_availability_add-on_reference/s1-firewalls-haar |
|
firewall-cmd --permanent --add-service=high-availability |
|
firewall-cmd --add-service=high-availability |
|
firewall-cmd --reload |
|
|
|
* Cluster Config; |
|
==== Both nodes |
|
echo Initial1 | passwd hacluster --stdin |
|
systemctl start pcsd.service |
|
systemctl enable pcsd.service |
|
systemctl disable libvirtd.service |
|
systemctl stop libvirtd.service |
|
==== One node |
|
pcs cluster auth m3-a01n01 m3-a01n02 |
|
# Username: hacluster |
|
# Password: |
|
|
|
pcs cluster setup --name m3-anvil-01 m3-a01n01 m3-a01n02 |
|
pcs cluster start --all |
|
pcs stonith create virsh_node1 fence_virsh pcmk_host_list="m3-a01n01" ipaddr="192.168.122.1" passwd="secret" login="root" delay="15" port="m3-a01n01" op monitor interval="60" |
|
pcs stonith create virsh_node2 fence_virsh pcmk_host_list="m3-a01n02" ipaddr="192.168.122.1" passwd="secret" login="root" port="m3-a01n02" op monitor interval="60" |
|
|
|
pcs resource create hypervisor systemd:libvirtd op monitor interval=60 |
|
pcs resource clone hypervisor clone-max=2 notify="false" |
|
|
|
pcs resource create drbd systemd:drbd op monitor interval=60 |
|
pcs resource clone drbd clone-max=2 notify="false" |
|
|
|
stonith_admin --fence m3-a01n02 --verbose; crm_error $? |
|
|
|
==== (configured via https) |
|
|
|
Ports we care about |
|
|
|
Porto Number Used by Nets Description |
|
TCP 2224 pcsd bcn It is crucial to open port 2224 in such a way that pcs from any node can talk to all nodes in the cluster, including itself. |
|
UDP 5404 corosync bcn Required on corosync nodes if corosync is configured for multicast UDP |
|
UDP 5405 corosync bcn Required on all corosync nodes (needed by corosync) |
|
TCP 7788+ drbd sn 1 port per resource |
|
TCP 49152-49215 virsh bcn live migration - migration_port_min and migration_port_max attributes in the /etc/libvirt/qemu.conf |
|
|
|
* After all changes; |
|
firewall-cmd --zone=public --add-port=49152-49215/tcp --permanent |
|
firewall-cmd --reload |
|
|
|
==== DRBD notes |
|
|
|
* resources can contain an US-ASCII character, except for spaces |
|
* A resource is a single replication stream for 1 or more resources, max 65.535 vols per resource |
|
* DRBD does, however, ship with an LVM integration facility that automates the creation of LVM snapshots immediately before synchronization. This ensures that a consistent copy of the data is always available on the peer, even while synchronization is running. See Using automated LVM snapshots during DRBD synchronization for details on using this facility. |
|
** https://docs.linbit.com/docs/users-guide-9.0/#s-lvm-snapshots |
|
* Checksum-based synchronization computes a block's hash on source and target and skips if matching, possibly making resync much faster for blocks rewritten with the same data, but at the cost of CPU. Make this a user-configurable option under the advanced tab. |
|
* Suspended replication allows congested replication links to suspend replication, leaving the peer in a consistent state, but allowing the primary to "pull ahead". When the congestion passes, the delta resyncs. Make this a user-configurable option with scary warnings. |
|
* Online verification can (should?) be run periodically on the server host (verification source will overwrite deltas on the verification target). Perhaps schedule to run once/month? Do resource sequentially as this places a CPU load on the nodes. |
|
* Replication traffic integrity checking uses a given available kernel crypto to verify data integrity on transmission to the peer. If the replicated block can not be verified against the digest, the connection is dropped and immediately re-established; because of the bitmap the typical result is a retransmission. |
|
** Make an option in the advanced tab. Test to see overhead this adds. Choose the lowest overhead algo (within reason) |
|
* Support for disk flushes might be something we want to disable, as it seems to force write-through even with a function FBWC/BBU. Need to test. |
|
* Note; "Inconsistent" is almost always useless. "Consistent" and "Outdated" are able to be used safely, just without whatever happened on the peer after. |
|
* Truck based replication, also known as disk shipping, is a means of preseeding a remote site with data to be replicated, by physically shipping storage media to the remote site. |
|
* Make sure that selinux doesn't block DRBD comms over the SN |
|
* See "5.15.1. Growing on-line" for growing a DRBD resource |
|
** Shrinking online is ONLY possible if the metadata is external. Worth creating *_md LVs? Offline requires backing up and restoring the MD |
|
|
|
Provisioning a server will need to: |
|
* Open up a DRBD port (or more, if multiple resources are created). |
|
* Create the DRBD resource(s); Find the lowest free rX.res, create it locally and on the peer (if up), |
|
|
|
firewall-cmd --zone=public --permanent --add-port=7788-7790/tcp |
|
firewall-cmd --reload |
|
|
|
* Provision the server via virt-install |
|
* push the new XML to striker such that the peer's anvil daemon picks it up and writes it out. |
|
|
|
[root@m3-a01n01 drbd.d]# drbdsetup status r0 --verbose --statistics |
|
r0 node-id:1 role:Primary suspended:no |
|
write-ordering:flush |
|
volume:0 minor:0 disk:UpToDate quorum:yes |
|
size:10485404 read:9682852 written:0 al-writes:0 bm-writes:0 upper-pending:0 lower-pending:0 al-suspended:no blocked:no |
|
m3-a01n02.alteeve.com node-id:0 connection:Connected role:Secondary congested:no |
|
volume:0 replication:SyncSource peer-disk:Inconsistent done:92.29 resync-suspended:no |
|
received:0 sent:9679140 out-of-sync:808144 pending:6 unacked:3 |
|
|
|
[root@m3-a01n02 ~]# cat /sys/kernel/debug/drbd/resources/r0/connections/m3-a01n01.alteeve.com/0/proc_drbd |
|
0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----- |
|
ns:24360 nr:10485404 dw:10485404 dr:25420 al:0 bm:0 lo:0 pe:[0;0] ua:0 ap:[0;0] ep:1 wo:2 oos:10461044 |
|
[>....................] sync'ed: 0.3% (10212/10236)M |
|
finish: 0:50:01 speed: 3,480 (5,020 -- 3,480) K/sec |
|
99% sector pos: 20970808/20970808 |
|
resync: used:0/61 hits:557 misses:2 starving:0 locked:0 changed:1 |
|
act_log: used:0/1237 hits:0 misses:0 starving:0 locked:0 changed:0 |
|
blocked on activity log: 0 |
|
|
|
[root@m3-a01n02 ~]# drbdadm primary r0 |
|
r0: State change failed: (-1) Multiple primaries not allowed by config |
|
Command 'drbdsetup primary r0' terminated with exit code 11 |
|
|
|
[root@m3-a01n02 ~]# drbdadm net-options --allow-two-primaries=yes r0 |
|
[root@m3-a01n02 ~]# drbdadm net-options --allow-two-primaries=no r0 |
|
|
|
drbdsetup show all |
|
drbdsetup show all --show-defaults |
|
|
|
== virt-install stuff |
|
* Get a list of --os-variants: 'osinfo-query os' |
|
* virt-install --print-xml (or --transient) |
|
* Migate; |
|
# For all resources under the server; |
|
#drbdadm net-options r0 --allow-two-primaries=yes |
|
|
|
drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes |
|
drbdsetup net-options srv01-c7_0 2 --_name=m3-a02n01.alteeve.com --csums-alg=md5 --data-integrity-alg=md5 --after-sb-0pri=discard-zero-changes --after-sb-1pri=discard-secondary --after-sb-2pri=disconnect --protocol=C --fencing=resource-and-stonith --allow-two-primaries=yes |
|
|
|
# Migrate: |
|
|
|
virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system list |
|
|
|
virsh migrate --unsafe --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system |
|
virsh -c qemu+ssh://root@m3-a02n02.alteeve.com/system migrate --undefinesource --live srv01-c7 qemu+ssh://m3-a02n01.alteeve.com/system |
|
|
|
# Again for all resource under the server; |
|
drbdadm net-options r0 --allow-two-primaries=no |
|
|
|
drbdsetup net-options <resource> <target_node_id> --_name=<target_node_name> --allow-two-primaries=yes |
|
virsh migrate --undefinesource --live <server> qemu+ssh://<target_node>/system |
|
drbdsetup net-options <resource> <target_node_id> --_name=<target_node_name> --allow-two-primaries=no |
|
|
|
pcs constraint list --full |
|
Location Constraints: |
|
Resource: srv01-c7 |
|
Enabled on: m3-a02n02.alteeve.com (score:50) (id:location-srv01-c7-m3-a02n02.alteeve.com-50) |
|
pcs constraint remove location-srv01-c7-m3-a02n02.alteeve.com-50 |
|
|
|
|
|
|
|
Set to 90% of BCN bandwidth |
|
migrate-setspeed domain bandwidth |
|
Set the maximum migration bandwidth (in MiB/s) for a domain which is being migrated to another host. bandwidth is interpreted as an |
|
unsigned long long value. Specifying a negative value results in an essentially unlimited value being provided to the hypervisor. The |
|
hypervisor can choose whether to reject the value or convert it to the maximum value allowed. |
|
|
|
migrate-getspeed domain |
|
Get the maximum migration bandwidth (in MiB/s) for a domain. |
|
|
|
|
|
==== /etc/drbd.d/global_common.conf |
|
# DRBD is the result of over a decade of development by LINBIT. |
|
# In case you need professional services for DRBD or have |
|
# feature requests visit http://www.linbit.com |
|
|
|
global { |
|
usage-count yes; |
|
|
|
# Decide what kind of udev symlinks you want for "implicit" volumes |
|
# (those without explicit volume <vnr> {} block, implied vnr=0): |
|
# /dev/drbd/by-resource/<resource>/<vnr> (explicit volumes) |
|
# /dev/drbd/by-resource/<resource> (default for implict) |
|
udev-always-use-vnr; # treat implicit the same as explicit volumes |
|
|
|
# minor-count dialog-refresh disable-ip-verification |
|
# cmd-timeout-short 5; cmd-timeout-medium 121; cmd-timeout-long 600; |
|
} |
|
|
|
common { |
|
handlers { |
|
# These are EXAMPLE handlers only. |
|
# They may have severe implications, |
|
# like hard resetting the node under certain circumstances. |
|
# Be careful when choosing your poison. |
|
|
|
# pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; |
|
# pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; |
|
# local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f"; |
|
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh"; |
|
# split-brain "/usr/lib/drbd/notify-split-brain.sh root"; |
|
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root"; |
|
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k"; |
|
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh; |
|
# quorum-lost "/usr/lib/drbd/notify-quorum-lost.sh root"; |
|
fence-peer "/usr/sbin/fence_pacemaker"; |
|
} |
|
|
|
startup { |
|
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb |
|
} |
|
|
|
options { |
|
# cpu-mask on-no-data-accessible |
|
|
|
# RECOMMENDED for three or more storage nodes with DRBD 9: |
|
# quorum majority; |
|
# on-no-quorum suspend-io | io-error; |
|
auto-promote yes; |
|
} |
|
|
|
disk { |
|
# size on-io-error fencing disk-barrier disk-flushes |
|
# disk-drain md-flushes resync-rate resync-after al-extents |
|
# c-plan-ahead c-delay-target c-fill-target c-max-rate |
|
# c-min-rate disk-timeout |
|
disk-flushes no; |
|
md-flushes no; |
|
} |
|
|
|
net { |
|
# protocol timeout max-epoch-size max-buffers |
|
# connect-int ping-int sndbuf-size rcvbuf-size ko-count |
|
# allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri |
|
# after-sb-1pri after-sb-2pri always-asbp rr-conflict |
|
# ping-timeout data-integrity-alg tcp-cork on-congestion |
|
# congestion-fill congestion-extents csums-alg verify-alg |
|
# use-rle |
|
|
|
# This computes an md5 sum of the block before replicating/synchronizing and skips if it matches already. |
|
# This can help with increasing replication/sync speed in some cases, but at the cost of CPU time. We may |
|
# disable this (or make it user-changable). |
|
csums-alg md5; |
|
|
|
# Use md5 sums to verify replicated data. More CPU overhead, but safer. |
|
data-integrity-alg md5; |
|
|
|
# We'll override this just before a migration as needed. |
|
allow-two-primaries no; |
|
|
|
# Traditional split-brain handling. |
|
after-sb-0pri discard-zero-changes; |
|
after-sb-1pri discard-secondary; |
|
after-sb-2pri disconnect; |
|
} |
|
} |
|
==== |
|
|
|
==== cat /etc/drbd.d/srv01-c7_0.res |
|
# Server srv01-c7 Disk 0 |
|
resource srv01-c7_0 { |
|
device /dev/drbd0; |
|
meta-disk internal; |
|
|
|
on m3-a02n01.alteeve.com { |
|
node-id 0; |
|
disk /dev/node01_vg0/srv01-c7; |
|
} |
|
on m3-a02n02.alteeve.com { |
|
node-id 1; |
|
disk /dev/node02_vg0/srv01-c7; |
|
} |
|
on m3-a02dr01.alteeve.com { |
|
node-id 2; |
|
disk /dev/dr01_vg0/srv01-c7; |
|
} |
|
|
|
connection { |
|
host m3-a02n01.alteeve.com address 10.41.20.1:7788; |
|
host m3-a02n02.alteeve.com address 10.41.20.2:7788; |
|
net { |
|
protocol C; |
|
fencing resource-and-stonith; |
|
} |
|
} |
|
connection { |
|
host m3-a02n01.alteeve.com address 10.41.20.1:7789; |
|
host m3-a02dr01.alteeve.com address 10.41.20.3:7789; |
|
net { |
|
protocol A; |
|
fencing dont-care; |
|
} |
|
} |
|
connection { |
|
host m3-a02n02.alteeve.com address 10.41.20.2:7790; |
|
host m3-a02dr01.alteeve.com address 10.41.20.3:7790; |
|
net { |
|
protocol A; |
|
fencing dont-care; |
|
} |
|
} |
|
} |
|
|
|
==== |
|
|
|
# Provision servers |
|
mkdir /mnt/anvil/{provision,files,archive,definitions} |
|
|
|
pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op stop timeout="60" on-fail="block" meta allow-migrate="true" failure-timeout="75" |
|
pcs resource create srv01-c7 ocf:alteeve:server hypervisor="qemu:///system" config="/mnt/anvil/definitions/srv01-c7.xml" meta allow-migrate="true" op monitor interval="10" op on-fail="block" meta allow-migrate="true" failure-timeout="75" |
|
pcs resource create srv01-c7 ocf:alteeve:server name="srv01-c7" meta allow-migrate="true" op monitor interval="10" op stop on-fail="block" meta allow-migrate="true" failure-timeout="75" |
|
|
|
pcs resource create srv01-c7 ocf:alteeve:server name="srv01-c7" meta allow-migrate="true" op monitor interval="60" op stop on-fail="block" op migrate_to on-fail="block" op migrate_from on-fail="block" meta allow-migrate="true" failure-timeout="75" |
|
|
|
|
|
== Resource Agent; https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc |
|
|
|
* A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with OCF_RESKEY_. For example, if the resource has an ip parameter set to 192.168.1.1, then the resource agent will have access to an environment variable OCF_RESKEY_ip holding that value. |
|
|
|
|
|
=== |
|
|
|
When stopping a server; |
|
14:03 < lge> "on-fail: block" |
|
14:03 < lge> is per operation type. |
|
14:08 < lge> anyways, you can also "on-fail: retry" |
|
|
|
OK, set the stop timeout to 60, set 'on-fail: block" and set the failure-timeout to 60 and see how pacemaker reacts. |
|
failure-timeout |
|
|
|
=== |
|
|
|
Migrate servers; |
|
|
|
- Let ScanCore set 'node-health' attribute (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-node-health) |
|
- Set 'migration-limit' to '1' to enforce serial live migration (http://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html#s-cluster-options). |
|
|
|
Migrate a single server by setting a location constraint against the node we want the VM off of. |
|
- If anything goes wrong, the server will enter a blocked state in pacemaker. |
|
- Recovery needs to be 'unmanage -> clean' to avoid a stop call. |
|
11:57 <@kgaillot> for your design, that sounds right. between cleanup and manage, i'd make sure there was a PE run without any pending actions blocked by the unmanaging -- you can either look at the logs on the DC, run "crm_simulate -SL", or just check the status for a bit |
|
11:58 <@kgaillot> you can play around with it by putting a higher preference on the to-be-cleaned node, to make sure it *does* move when you re-manage. that way you can see what logs/simulate/status look like |
|
|
|
12:07 <@kgaillot> i'm thinking if you do crm_resource --reprobe instead of cleanup in the above sequence, that should prevent anything unexpected |
|
12:07 <@kgaillot> unmanage -> adjust preferences if needed -> reprobe resource -> wait for probe results to come back in, and if status looks good -> re-manage |
|
12:08 <@kgaillot> the reprobe will wipe the entire resource history and fail counts for the resource, causing pacemaker to recheck the current status on all nodes. if the status then shows the resource running where you expect/want it, with no errors, then it's not going to do anything further |
|
12:09 <@kgaillot> (in 2.0, cleanup only erases the history where the resource has failed, while reprobe erases the history regardless) |
|
12:13 <@kgaillot> if there are no failures in the resource history, there should be no risk of a full stop. if there is no resource history at all, then after reprobe, there should be no risk of any actions (assuming you've set up location preferences and stickiness how you want them) |
|
|
|
Recover from a failed migration; |
|
|
|
reset location to prefer current host -> unmanage resource -> cleanup resource -> manage resource |
|
|
|
(running on node 2, so re-add location constraint - basically, make sure location constraint favours current host) |
|
|
|
pcs resource unmanage srv01-c7 |
|
pcs constraint remove location-srv01-c7-m3-a02n02.alteeve.com-50 |
|
crm_resource --reprobe |
|
pcs resource manage srv01-c7 |
|
|
|
|
|
https://pykickstart.readthedocs.io/en/latest/kickstart-docs.html#chapter-1-introduction |
|
|
|
==== Sample kickstart for Fedora28 netinstall |
|
#version=DEVEL |
|
ignoredisk --only-use=vda |
|
# Partition clearing information |
|
clearpart --none --initlabel |
|
# Use graphical install |
|
graphical |
|
# Use network installation |
|
url --url="http://10.1.4.1/fedora28/x86_64/iso/" |
|
# Keyboard layouts |
|
keyboard --vckeymap=us --xlayouts='us' |
|
# System language |
|
lang en_CA.UTF-8 |
|
|
|
# Network information |
|
network --bootproto=dhcp --device=ens3 --ipv6=auto --activate |
|
network --hostname=localhost.localdomain |
|
# Root password |
|
rootpw --iscrypted $6$fyAht.3wBVlRGgqG$5dqIv2NrBD87uA51fxuoic/t2G93pXPUjVlh27Avg20ZGY409SK8cMVgABswF.krJSVIyoHfIChXNfpP/qTjI1 |
|
# Run the Setup Agent on first boot |
|
firstboot --enable |
|
# Do not configure the X Window System |
|
skipx |
|
# System services |
|
services --enabled="chronyd" |
|
# System timezone |
|
timezone Etc/GMT --isUtc |
|
# System bootloader configuration |
|
bootloader --location=mbr --boot-drive=vda |
|
|
|
%packages |
|
@^server-product-environment |
|
|
|
%end |
|
|
|
%addon com_redhat_kdump --disable --reserve-mb='128' |
|
|
|
%end |
|
|
|
%anaconda |
|
pwpolicy root --minlen=6 --minquality=1 --notstrict --nochanges --notempty |
|
pwpolicy user --minlen=6 --minquality=1 --notstrict --nochanges --emptyok |
|
pwpolicy luks --minlen=6 --minquality=1 --notstrict --nochanges --notempty |
|
%end |
|
==== |
|
|
|
==== M3 Striker Kickstart |
|
# Setion 1 |
|
# Command section – Refer to Chapter 2 for a list of kickstart options. You must include the required options. |
|
|
|
|
|
### NOTE: The %packages, %pre, %pre-install, %post, %onerror, and %traceback sections are all required to be closed with %end |
|
# Section 2 |
|
# The %packages section – Refer to Chapter 3 for details. |
|
|
|
|
|
# Section 3: |
|
# The %pre, %pre-install, %post, %onerror, and %traceback sections – These sections can be in any order and are not required. Refer to Chapter 4, Chapter 5, and Chapter 6 for details. |
|
==== |
|
|
|
0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/sdb | grep -e ID_PATH=) |
|
0 root@pulsar:/var/lib/libvirt/images# if [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi |
|
USB drive |
|
0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/sda | grep -e ID_PATH=) |
|
0 root@pulsar:/var/lib/libvirt/images# if [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi |
|
SATA drive |
|
0 root@pulsar:/var/lib/libvirt/images# dev_PATH=$(udevadm info /dev/nvme0n1 | grep -e ID_PATH=) |
|
0 root@pulsar:/var/lib/libvirt/images# if [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi |
|
NVMe drive |
|
[root@localhost ~]# dev_PATH=$(udevadm info /dev/sda | grep -e ID_PATH=) |
|
[root@localhost ~]# if [[ $dev_PATH == *"usb"* ]]; then echo "USB drive"; elif [[ $dev_PATH == *"nvme"* ]]; then echo "NVMe drive"; elif [[ $dev_PATH == *"ata"* ]]; then echo "SATA drive"; elif [[ $dev_PATH == *"scsi"* ]]; then echo "SCSI drive"; fi |
|
SCSI drive |
|
|
|
|
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/model |
|
Flash Disk |
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/model |
|
SanDisk SDSSDXPS |
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/model |
|
INTEL SSDPEKKW512G7 |
|
|
|
### Stuff only NVMe has |
|
1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/subsysnqn |
|
nqn.2014.08.org.nvmexpress:80868086BTPY63650FPG512F INTEL SSDPEKKW512G7 |
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/subsysnqn |
|
cat: /sys/class/block/sda/device/subsysnqn: No such file or directory |
|
1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/subsysnqn |
|
cat: /sys/class/block/sdb/device/subsysnqn: No such file or directory |
|
|
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/serial |
|
BTPY63650FPG512F |
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/serial |
|
cat: /sys/class/block/sda/device/serial: No such file or directory |
|
1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/serial |
|
cat: /sys/class/block/sdb/device/serial: No such file or directory |
|
|
|
1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/nvme0n1/device/transport |
|
pcie |
|
0 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sda/device/transport |
|
cat: /sys/class/block/sda/device/transport: No such file or directory |
|
1 root@pulsar:/var/lib/libvirt/images# cat /sys/class/block/sdb/device/transport |
|
cat: /sys/class/block/sdb/device/transport: No such file or directory |
|
|
|
|
|
|
|
Disk size: |
|
/sys/class/block/sda/size * <block size>
|
|
|