IPMI Health Checks and reporting to UptimeKUma

Script to do a health check on IPMI and send alerts to UptimeKuma.

#!/usr/bin/env bash
set -euo pipefail

####### CONFIGURATION #######
INV_FILE="ipmi_hosts.csv"
BASE_OUT="output"
SSH_OPTS="-n -o HostKeyAlgorithms=+ssh-rsa -o StrictHostKeyChecking=no"
UptimeKuma_URL="https://site/api/push/key"
#############################

send_alert() {
  local status=$1 msg=$2
  # URL-encode spaces
  local msg_enc
  msg_enc=$(printf '%s' "$msg" | sed 's/ /%20/g')
  curl -s "${UptimeKuma_URL}?status=${status}&msg=${msg_enc}" >/dev/null
}

fetch_health() {
  local IP=$1 USER=$2 PASS=$3 ALIAS=$4
  local OUTDIR="$BASE_OUT/$ALIAS"
  mkdir -p "$OUTDIR/details"

  echo "=== [$ALIAS] ($IP): fetching summary ==="
  sshpass -p "$PASS" ssh $SSH_OPTS \
    "$USER@$IP" "syshealth summary" \
    > "$OUTDIR/summary.txt"

  # Normalize line endings (remove any CRs)
  sed -i 's/\r$//' "$OUTDIR/summary.txt"

  # 1) Find any line with at least 3 equal signs
  local start
  start=$(grep -n -E '={3,}' "$OUTDIR/summary.txt" | head -n1 | cut -d: -f1 || true)

  if [[ -z "$start" ]]; then
    echo "  ! ERROR: could not find table delimiter in summary.txt"
    send_alert down "$ALIAS parse_error"
    return
  fi

  # 2) Data starts just after that line
  local data_line=$(( start + 1 ))
  local summary_tail
  summary_tail=$(tail -n +"$data_line" "$OUTDIR/summary.txt")

  local issues_str=""

  echo "  → parsing for Warning/Critical…"
  while IFS= read -r raw; do
    [[ -z "${raw// }" ]] && continue
    [[ "$raw" =~ ^system\> ]] && continue

    local line="${raw#"${raw%%[![:space:]]*}"}"
    line="${line%"${line##*[![:space:]]}"}"

    local status="${line##* }"
    local component="${line% $status}"
    component="${component#"${component%%[![:space:]]*}"}"
    component="${component%"${component##*[![:space:]]}"}"

    if [[ "$status" =~ ^(Warning|Critical)$ ]] && [[ "$component" != "System" ]]; then
      issues_str+="${component}:${status},"

      case "$component" in
        "Cooling Devices") module="cooling"    ;;
        "Power Modules")   module="power"      ;;
        "Local Storage")   module="storage"    ;;
        "Processors")      module="processors" ;;
        "Memory")          module="memory"     ;;
        *) echo "    • no subcommand for $component"; continue ;;
      esac

      echo "    -> $component is $status; fetching details…"
      sshpass -p "$PASS" ssh $SSH_OPTS \
        "$USER@$IP" "syshealth $module" \
        > "$OUTDIR/details/${module}.txt"
      echo "       saved to $OUTDIR/details/${module}.txt"
    fi
  done <<< "$summary_tail"

  issues_str="${issues_str%,}"

  if [[ -n "$issues_str" ]]; then
    send_alert down "$ALIAS $issues_str"
    echo "=== done for [$ALIAS] (down): $issues_str ==="
  else
    send_alert up "$ALIAS OK"
    echo "=== done for [$ALIAS] (up) ==="
  fi

  echo
}

# MAIN LOOP
while IFS=, read -r IP USER PASS ALIAS; do
  [[ -z "$IP" || "$IP" == \#* ]] && continue
  fetch_health "$IP" "$USER" "$PASS" "$ALIAS"
done < "$INV_FILE"