#!/bin/bash
# proxmox-kms-bridge — CLI for 45Drives SecureVM for Proxmox
# Part of proxmox-kms-bridge
#
# Usage: proxmox-kms-bridge <command> [args]
#
# Commands:
#   status           Show OpenBao connectivity and encrypted VM summary
#   verify <VMID>    Test key retrieval for a VM without unlocking
#   revert <VMID>    Decrypt and revert a VM to its original unencrypted state
#   cleanup          Remove orphaned LUKS images for deleted VMs
set -euo pipefail

CONF_FILE="/etc/qxvault/qxvault.conf"

# ── Defaults ───────────────────────────────────────────────────────
OPENBAO_ADDR="https://127.0.0.1:8200"
OPENBAO_CACERT=""
OPENBAO_SKIP_VERIFY="false"
OPENBAO_TIMEOUT="10"
LUKS_IMAGE_DIR="/var/lib/vz/images"
LUKS_IMAGE_TEMPLATE="vm-{vmid}-disk-qxvault.luks"
LUKS_MAPPER_TEMPLATE="vm{vmid}_crypt"

if [ -f "$CONF_FILE" ]; then
  # shellcheck source=/dev/null
  . "$CONF_FILE"
fi

# ── Helpers ────────────────────────────────────────────────────────
log() { echo "[proxmox-kms-bridge] $*"; }
die() { echo "[proxmox-kms-bridge] ERROR: $*" >&2; exit 1; }

vault_curl() {
  local -a curl_args=( -s --connect-timeout "$OPENBAO_TIMEOUT" --max-time "$((OPENBAO_TIMEOUT * 2))" )
  if [ -n "$OPENBAO_CACERT" ]; then curl_args+=( --cacert "$OPENBAO_CACERT" ); fi
  if [ "$OPENBAO_SKIP_VERIFY" = "true" ]; then curl_args+=( -k ); fi
  curl "${curl_args[@]}" "$@"
}

usage() {
  cat <<'EOF'
45Drives SecureVM for Proxmox — Built by 45Drives in partnership with Crypto4A

Usage: proxmox-kms-bridge <command> [args]

Commands:
  status           Show QxVault connectivity and encrypted VM summary
  verify <VMID>    Test key retrieval for a VM (does not unlock)
  revert <VMID>    Decrypt and revert a VM to unencrypted state
  cleanup          Remove orphaned LUKS images for deleted VMs
  help             Show this help

EOF
  exit 0
}

# ── status command ─────────────────────────────────────────────────
cmd_status() {
  echo "45Drives SecureVM for Proxmox — Status"
  echo "======================================="
  echo

  # OpenBao connectivity
  echo "QxVault Endpoint: ${OPENBAO_ADDR}"
  local health_code
  health_code=$(vault_curl -o /dev/null -w '%{http_code}' "${OPENBAO_ADDR}/v1/sys/health" 2>/dev/null) || true
  case "$health_code" in
    200) echo "QxVault Status:   Connected (unsealed)" ;;
    429) echo "QxVault Status:   Connected (standby)" ;;
    472) echo "QxVault Status:   Connected (DR secondary)" ;;
    473) echo "QxVault Status:   Connected (performance standby)" ;;
    501) echo "QxVault Status:   Connected (NOT INITIALIZED)" ;;
    503) echo "QxVault Status:   Connected (SEALED)" ;;
    *)   echo "QxVault Status:   UNREACHABLE (HTTP ${health_code:-000})" ;;
  esac

  # Config
  echo "Config File:      ${CONF_FILE}"
  if [ -f "$CONF_FILE" ]; then
    echo "Config Status:    Present"
  else
    echo "Config Status:    MISSING"
  fi
  echo

  # Scan for encrypted VMs
  echo "Encrypted VMs:"
  echo "──────────────────────────────────────────────────────────"
  printf "%-8s %-12s %-10s %s\n" "VMID" "LUKS State" "Hookscript" "Image"
  echo "──────────────────────────────────────────────────────────"

  local found=0
  local vmid
  # Use qm list for reliable VM enumeration across all storage types
  while IFS= read -r vmid; do
    [ -n "$vmid" ] || continue
    local conf_file="/etc/pve/qemu-server/${vmid}.conf"
    [ -f "$conf_file" ] || continue
    if grep -q "qxvault-luks-hook" "$conf_file" 2>/dev/null; then
      local mapper="${LUKS_MAPPER_TEMPLATE//\{vmid\}/$vmid}"
      local image="${LUKS_IMAGE_DIR}/${vmid}/${LUKS_IMAGE_TEMPLATE//\{vmid\}/$vmid}"
      local luks_state="closed"
      local hook_state="attached"

      if cryptsetup status "$mapper" >/dev/null 2>&1; then
        luks_state="OPEN"
      fi

      if [ ! -f "$image" ]; then
        luks_state="NO IMAGE"
      fi

      printf "%-8s %-12s %-10s %s\n" "$vmid" "$luks_state" "$hook_state" "$image"
      found=$((found + 1))
    fi
  done < <(qm list 2>/dev/null | awk 'NR>1 {print $1}')

  if [ "$found" -eq 0 ]; then
    echo "  (none found)"
  fi
  echo "──────────────────────────────────────────────────────────"
  echo "Total encrypted VMs: $found"

  # Scan for orphaned LUKS images (VMs that no longer exist)
  local orphan_count=0
  local orphan_list=""
  local luks_files=()
  while IFS= read -r -d '' f; do
    luks_files+=("$f")
  done < <(find "$LUKS_IMAGE_DIR" -name 'vm-*-disk-qxvault.luks' -print0 2>/dev/null)

  for luks_file in "${luks_files[@]}"; do
    local obase
    obase=$(basename "$luks_file")
    local ovmid
    ovmid=$(echo "$obase" | sed -n 's/^vm-\([0-9]\+\)-disk-qxvault\.luks$/\1/p')
    [ -n "$ovmid" ] || continue
    if ! qm status "$ovmid" >/dev/null 2>&1; then
      orphan_count=$((orphan_count + 1))
      local osize
      osize=$(stat -c%s "$luks_file" 2>/dev/null || echo 0)
      orphan_list="${orphan_list}$(printf '%-8s %s (%s)' "$ovmid" "$luks_file" "$(numfmt --to=iec "$osize")")\n"
    fi
  done

  echo
  if [ "$orphan_count" -gt 0 ]; then
    echo "Orphaned LUKS Images: $orphan_count"
    echo "──────────────────────────────────────────────────────────"
    printf "%-8s %s\n" "VMID" "Image (Size)"
    echo "──────────────────────────────────────────────────────────"
    echo -e "$orphan_list"
    echo "Run 'proxmox-kms-bridge cleanup' to remove."
  else
    echo "Orphaned LUKS Images: 0"
  fi
}

# ── verify command ─────────────────────────────────────────────────
cmd_verify() {
  local vmid="${1:-}"
  [ -n "$vmid" ] || die "Usage: proxmox-kms-bridge verify <VMID>"
  [[ "$vmid" =~ ^[0-9]+$ ]] || die "VMID must be numeric"

  echo "Verifying key retrieval for VMID ${vmid}..."

  # 2FA status
  local tfa_conf="/etc/qxvault/2fa/${vmid}.conf"
  if [ -f "$tfa_conf" ]; then
    local tfa_mode=""
    # shellcheck source=/dev/null
    . "$tfa_conf"
    echo "  2FA:           ENABLED (mode: ${TFA_MODE:-unknown})"
    echo "  Auto-unlock:   BLOCKED (manual unlock required)"
  else
    echo "  2FA:           disabled"
    echo "  Auto-unlock:   enabled"
  fi

  # Test key retrieval (stderr shows debug info)
  local key_b64
  if key_b64=$(/usr/libexec/vault-dmkey get-key --name "$vmid" 2>&1); then
    if [ -n "$key_b64" ]; then
      local key_len
      key_len=$(echo "$key_b64" | base64 -d 2>/dev/null | wc -c)
      echo "  Key retrieval: OK"
      echo "  Key length:    ${key_len} bytes"
    else
      echo "  Key retrieval: OK (but could not parse output)"
    fi
  else
    echo "  Key retrieval: FAILED"
    # shellcheck disable=SC2001
    echo "$key_b64" | sed 's/^/    /'
    exit 1
  fi

  # Check LUKS image
  local image="${LUKS_IMAGE_DIR}/${vmid}/${LUKS_IMAGE_TEMPLATE//\{vmid\}/$vmid}"
  if [ -f "$image" ]; then
    echo "  LUKS image:    ${image} (exists)"
    local luks_info
    if luks_info=$(cryptsetup luksDump "$image" 2>/dev/null | head -5); then
      # shellcheck disable=SC2001
      echo "$luks_info" | sed 's/^/    /'
    fi
  else
    echo "  LUKS image:    ${image} (NOT FOUND)"
  fi

  # Check mapper state
  local mapper="${LUKS_MAPPER_TEMPLATE//\{vmid\}/$vmid}"
  if cryptsetup status "$mapper" >/dev/null 2>&1; then
    echo "  Mapper:        /dev/mapper/${mapper} (OPEN)"
  else
    echo "  Mapper:        /dev/mapper/${mapper} (closed)"
  fi

  echo "  Verification complete."
}

# ── revert command ─────────────────────────────────────────────────
cmd_revert() {
  local vmid="${1:-}"
  [ -n "$vmid" ] || die "Usage: proxmox-kms-bridge revert <VMID>"
  [[ "$vmid" =~ ^[0-9]+$ ]] || die "VMID must be numeric"
  [ "$(id -u)" -eq 0 ] || die "Must run as root"

  local mapper="${LUKS_MAPPER_TEMPLATE//\{vmid\}/$vmid}"
  local image="${LUKS_IMAGE_DIR}/${vmid}/${LUKS_IMAGE_TEMPLATE//\{vmid\}/$vmid}"
  local state_dir="/root/qxvault-migrate-${vmid}"

  # Check VM is stopped
  if command -v qm >/dev/null 2>&1 && qm status "$vmid" 2>/dev/null | grep -q running; then
    die "VM ${vmid} is running. Stop it first: qm stop ${vmid}"
  fi

  # Check mapper — close it if VM is stopped
  if cryptsetup status "$mapper" >/dev/null 2>&1; then
    log "Closing LUKS mapper $mapper..."
    
    # Try to close mapper, with retries for lingering file handles
    local retry=0
    local max_retries=5
    while [ $retry -lt $max_retries ]; do
      if cryptsetup close "$mapper" 2>/dev/null; then
        log "LUKS mapper closed successfully"
        break
      fi
      
      retry=$((retry + 1))
      if [ $retry -lt $max_retries ]; then
        log "Mapper still in use, waiting 1s before retry ($retry/$max_retries)..."
        sleep 1
      else
        die "LUKS mapper $mapper is still in use after $max_retries attempts. Try stopping the VM again or check for stuck processes."
      fi
    done
  fi

  [ -f "$image" ] || die "LUKS image not found: $image"

  # Check for saved rollback state
  if [ ! -d "$state_dir" ]; then
    die "No rollback state found at $state_dir. Cannot determine original disk."
  fi

  local boot_slot="" original_spec=""
  [ -f "${state_dir}/boot-slot.txt" ] && boot_slot=$(<"${state_dir}/boot-slot.txt")
  [ -f "${state_dir}/disk-spec.txt" ] && original_spec=$(<"${state_dir}/disk-spec.txt")

  [ -n "$boot_slot" ] || die "Cannot determine original boot slot from rollback state"
  [ -n "$original_spec" ] || die "Cannot determine original disk spec from rollback state"

  log "Reverting VMID ${vmid} to unencrypted state"
  log "  Original disk: ${boot_slot} = ${original_spec}"

  # Operation lock file (for UI progress tracking)
  local op_lock="/run/qxvault-op-${vmid}.json"
  echo "{\"op\":\"revert\",\"vmid\":\"${vmid}\",\"startTime\":$(date +%s)}" > "$op_lock"
  # shellcheck disable=SC2064  # Intentional: expand now because op_lock is local
  trap "rm -f '${op_lock}'" EXIT

  # Unlock VM if it was locked after encryption
  log "Unlocking VM ${vmid}..."
  qm unlock "$vmid" 2>/dev/null || true

  # Remove hookscript first — if this fails, the VM still has LUKS
  # and the hookscript, which is a safe (working) state.
  log "Removing hookscript..."
  qm set "$vmid" --delete hookscript

  # Restore original disk config
  log "Restoring VM disk config..."
  qm set "$vmid" "--${boot_slot}" "$original_spec"

  # Remove LUKS image
  log "Removing LUKS image: ${image}"
  rm -f "$image"

  log "=========================================="
  log "  Revert complete for VMID ${vmid}"
  log "=========================================="
  log "  Disk restored to: ${original_spec}"
  log "  LUKS image removed"
  log "  Hookscript removed"
  log "  Note: Key still exists in QxVault (manual cleanup if needed)"
  log "=========================================="
}


cmd_cleanup() {
  local dry_run=false
  if [ "${1:-}" = "--dry-run" ] || [ "${1:-}" = "-n" ]; then
    dry_run=true
  fi

  log "Scanning for orphaned LUKS images in ${LUKS_IMAGE_DIR}/..."

  local orphan_count=0
  local cleaned_count=0
  local luks_files=()

  # Collect all LUKS image files first (avoids set -e issues with find+while)
  while IFS= read -r -d '' f; do
    luks_files+=("$f")
  done < <(find "$LUKS_IMAGE_DIR" -name 'vm-*-disk-qxvault.luks' -print0 2>/dev/null)

  for luks_file in "${luks_files[@]}"; do
    # Extract VMID from filename (e.g., vm-184-disk-qxvault.luks -> 184)
    local basename
    basename=$(basename "$luks_file")
    local vmid
    vmid=$(echo "$basename" | sed -n 's/^vm-\([0-9]\+\)-disk-qxvault\.luks$/\1/p')

    if [ -z "$vmid" ]; then
      continue
    fi

    # Check if VM still exists in Proxmox
    if qm status "$vmid" >/dev/null 2>&1; then
      continue
    fi

    orphan_count=$((orphan_count + 1))

    # Close LUKS mapper if still open
    local mapper="${LUKS_MAPPER_TEMPLATE//\{vmid\}/$vmid}"
    if cryptsetup status "$mapper" >/dev/null 2>&1; then
      if [ "$dry_run" = true ]; then
        log "  [dry-run] Would close mapper: $mapper"
      else
        log "  Closing orphaned mapper: $mapper"
        cryptsetup close "$mapper" 2>/dev/null || true
      fi
    fi

    if [ "$dry_run" = true ]; then
      log "  [dry-run] Would remove: $luks_file (VMID $vmid no longer exists)"
    else
      log "  Removing orphaned LUKS image: $luks_file (VMID $vmid no longer exists)"
      rm -f "$luks_file"
      # Remove the VM's image directory if empty
      local vmdir
      vmdir=$(dirname "$luks_file")
      rmdir "$vmdir" 2>/dev/null || true
      cleaned_count=$((cleaned_count + 1))
    fi
  done

  if [ "$orphan_count" -eq 0 ]; then
    log "No orphaned LUKS images found."
  elif [ "$dry_run" = true ]; then
    log "Found $orphan_count orphaned LUKS image(s). Run without --dry-run to remove."
  else
    log "Cleaned up $cleaned_count orphaned LUKS image(s)."
  fi
}

# ── Dispatch ───────────────────────────────────────────────────────
COMMAND="${1:-}"
shift || true

case "$COMMAND" in
  status)   cmd_status "$@" ;;
  verify)   cmd_verify "$@" ;;
  revert)   cmd_revert "$@" ;;
  cleanup)  cmd_cleanup "$@" ;;
  help|-h|--help|"") usage ;;
  *)        die "Unknown command: $COMMAND. Run 'proxmox-kms-bridge help' for usage." ;;
esac
