Script to check a zfs cluster health and send alerts to UptimeKuma.
#!/bin/bash
NODES=("node1" "node2" "node3")
for node in "${NODES[@]}"; do
echo "Checking node: $node"
issues=()
output=$(ssh -o ConnectTimeout=5 root@"$node" bash <<'EOF'
echo "===SERVICE_STATUS==="
systemctl is-active pve-cluster corosync pvedaemon pveproxy pvestatd pve-ha-lrm pve-ha-crm
echo "===ZFS_STATUS==="
zpool list -H -o name,health
echo "===HA_MIGRATIONS==="
journalctl -u pve-ha-lrm --since "5 min ago" -o cat | grep -i migrate
EOF
)
if [[ -z "$output" ]]; then
echo "No output from $node — likely SSH failed"
curl -fsS "https://site/api/push/key?status=down&msg=$node:ssh-unreachable"
echo "------"
continue
fi
service_block=$(echo "$output" | awk '/===SERVICE_STATUS===/{flag=1; next}/===/{flag=0}flag')
zfs_block=$(echo "$output" | awk '/===ZFS_STATUS===/{flag=1; next}/===/{flag=0}flag')
migration_block=$(echo "$output" | awk '/===HA_MIGRATIONS===/{flag=1; next}/===/{flag=0}flag')
if echo "$service_block" | grep -qv "active"; then
issues+=("service-issue")
fi
if echo "$zfs_block" | awk '{ if ($2 != "ONLINE") exit 1 }'; then
: # All good
else
echo "ZFS issue(s) on $node:"
echo "$zfs_block"
issues+=("zfs-issue")
fi
if [[ -n "$migration_block" ]]; then
echo "Recent HA migration(s) on $node:"
echo "$migration_block"
issues+=("recent-migration")
fi
if [ ${#issues[@]} -gt 0 ]; then
msg=$(IFS='+'; echo "${issues[*]}")
echo "Issues on $node: ${issues[*]}"
curl -fsS "https://site/api/push/key?status=down&msg=$node:$msg"
else
echo "All checks passed on $node. Sending UP signal."
curl -fsS "https://site/api/push/key?status=up&msg=$node"
fi
echo "------"
done