kitestacks-homelab/scripts/backup-volumes.sh
kenpat 5b3698191e ops: add nightly Docker volume backup to SAMURAI
- scripts/backup-volumes.sh: tar each named volume via alpine, rsync to
  SAMURAI (Tailscale 100.74.x.x) at 02:00; 7-day retention; preflight
  checks Tailscale + SSH before starting
- scripts/setup-samurai-ssh.sh: one-time SSH key install to SAMURAI
- scripts/monk-backup.{service,timer}: systemd units for nightly schedule
- docs/backup-setup.md: full setup instructions incl. Windows OpenSSH
  config and admin authorized_keys fix

Phase 2 (MinIO S3 on SAMURAI) tracked as TODO in backup-volumes.sh.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-19 02:59:14 -05:00

110 lines
4.8 KiB
Bash
Executable file

#!/usr/bin/env bash
# Nightly Docker volume backup: monk → SAMURAI (Tailscale)
# Phase 1: rsync tar archives over SSH
# Phase 2 (TODO): push to MinIO S3 on SAMURAI when deployed
#
# First-time setup:
# 1. Run scripts/setup-samurai-ssh.sh to install the SSH key on SAMURAI
# 2. Set SAMURAI_USER to your Windows username (default: kenpat)
# 3. On SAMURAI, create the backup dir (default: C:\KiteBackups\monk)
# and make sure rsync is available (install from git-for-windows or cwRsync)
set -euo pipefail
# ── config ────────────────────────────────────────────────────────────────────
SAMURAI_IP="100.74.0.109"
SAMURAI_USER="${SAMURAI_USER:-kenpat}"
SAMURAI_KEY="${HOME}/.ssh/id_ed25519_samurai"
# Windows path as rsync sees it via SSH: /mnt/c/KiteBackups/monk or a Cygwin-style path
SAMURAI_DEST="${SAMURAI_USER}@${SAMURAI_IP}:/cygdrive/c/KiteBackups/monk"
BACKUP_TMP="/tmp/monk-volume-backups"
LOG_DIR="/var/log/kitestacks"
LOG_FILE="${LOG_DIR}/backup-volumes.log"
RETAIN_DAYS=7
# Named volumes to back up (skip anonymous hash-named ones)
VOLUMES=(
kite-ai_open-webui
osticket_osticket_db
osticket_osticket_uploads
portainer_data
prometheus_prometheus-data
uptime-kuma_uptime-kuma
)
# ── logging ───────────────────────────────────────────────────────────────────
mkdir -p "${LOG_DIR}"
exec > >(tee -a "${LOG_FILE}") 2>&1
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
# ── preflight ─────────────────────────────────────────────────────────────────
if ! tailscale status 2>/dev/null | grep -q "${SAMURAI_IP}"; then
log "ERROR: SAMURAI (${SAMURAI_IP}) not visible on Tailscale — aborting"
exit 1
fi
if ! ssh -i "${SAMURAI_KEY}" -o ConnectTimeout=10 -o BatchMode=yes \
"${SAMURAI_USER}@${SAMURAI_IP}" true 2>/dev/null; then
log "ERROR: SSH to SAMURAI failed — check key setup (run scripts/setup-samurai-ssh.sh)"
exit 1
fi
# ── backup ────────────────────────────────────────────────────────────────────
TIMESTAMP=$(date '+%Y%m%d-%H%M%S')
WORK_DIR="${BACKUP_TMP}/${TIMESTAMP}"
mkdir -p "${WORK_DIR}"
log "Starting backup run ${TIMESTAMP}"
SUCCESS=0
FAIL=0
for vol in "${VOLUMES[@]}"; do
# Verify volume exists
if ! docker volume inspect "${vol}" &>/dev/null; then
log "SKIP: volume '${vol}' not found"
continue
fi
ARCHIVE="${WORK_DIR}/${vol}.tar.gz"
log "Archiving ${vol} ..."
# Stream volume contents via ephemeral alpine container into a local archive
if docker run --rm \
-v "${vol}:/source:ro" \
alpine \
tar czf - -C /source . > "${ARCHIVE}"; then
SIZE=$(du -sh "${ARCHIVE}" | cut -f1)
log " OK: ${vol}${ARCHIVE} (${SIZE})"
(( SUCCESS++ )) || true
else
log " FAIL: could not archive ${vol}"
rm -f "${ARCHIVE}"
(( FAIL++ )) || true
fi
done
# ── rsync to SAMURAI ──────────────────────────────────────────────────────────
log "Syncing archives to SAMURAI ..."
if rsync -az --progress \
-e "ssh -i ${SAMURAI_KEY} -o StrictHostKeyChecking=accept-new" \
"${WORK_DIR}/" \
"${SAMURAI_DEST}/${TIMESTAMP}/"; then
log "rsync complete → ${SAMURAI_DEST}/${TIMESTAMP}/"
else
log "ERROR: rsync to SAMURAI failed"
FAIL=$(( FAIL + 1 ))
fi
# ── cleanup local tmp ─────────────────────────────────────────────────────────
rm -rf "${WORK_DIR}"
# ── prune old backups on SAMURAI ──────────────────────────────────────────────
log "Pruning backups older than ${RETAIN_DAYS} days on SAMURAI ..."
ssh -i "${SAMURAI_KEY}" -o BatchMode=yes \
"${SAMURAI_USER}@${SAMURAI_IP}" \
"find /cygdrive/c/KiteBackups/monk -maxdepth 1 -type d -mtime +${RETAIN_DAYS} -exec rm -rf {} + 2>/dev/null; true" \
&& log "Prune complete" || log "Prune failed (non-fatal)"
# ── summary ───────────────────────────────────────────────────────────────────
log "Backup run ${TIMESTAMP} complete: ${SUCCESS} OK, ${FAIL} failed"
[[ ${FAIL} -eq 0 ]]