01The Day My Server Died
02Three Layers of Backup
1#!/bin/bash2# backup.sh — Daily backup script3set -euo pipefail45BACKUP_DIR="/opt/backups"6TIMESTAMP=$(date +%Y%m%d-%H%M%S)7RESTIC_REPO="b2:my-bucket:server-backups"89echo "=== Starting backup at $TIMESTAMP ==="1011# Layer 1: Volume snapshots12echo "Creating volume snapshots..."13for volume in $(docker volume ls -q); do14 docker run --rm -v "$volume:/source:ro" -v "$BACKUP_DIR/volumes:/backup" alpine tar czf "/backup/${volume}-${TIMESTAMP}.tar.gz" -C /source .15done1617# Cleanup old snapshots (keep 7 days)18find "$BACKUP_DIR/volumes" -name "*.tar.gz" -mtime +7 -delete1920# Layer 2: Off-site with restic21echo "Running restic backup..."22restic -r "$RESTIC_REPO" backup "$BACKUP_DIR/volumes" /opt/docker --exclude="*.log" --tag "daily"2324# Prune old backups25restic -r "$RESTIC_REPO" forget --keep-daily 30 --keep-monthly 12 --keep-yearly 2 --prune2627echo "=== Backup completed ==="restic vs rclone: Use restic for backup (it handles deduplication, encryption, and retention policies). Use rclone for syncing (mirroring a folder to cloud storage). They solve different problems. I use restic for backups and rclone to sync my Paperless export folder to Google Drive as an additional copy.
03Testing Your Restores
1#!/bin/bash2# restore-test.sh — Monthly restore verification3set -euo pipefail45RESTIC_REPO="b2:my-bucket:server-backups"6RESTORE_DIR="/opt/restore-test"78echo "=== Restore Test Started ==="910# Verify backup integrity11echo "Checking backup integrity..."12restic -r "$RESTIC_REPO" check --read-data-subset=10%13if [ $? -ne 0 ]; then14 echo "CRITICAL: Backup integrity check failed!"15 exit 116fi1718# Restore latest snapshot19echo "Restoring latest snapshot..."20mkdir -p "$RESTORE_DIR"21restic -r "$RESTIC_REPO" restore latest --target "$RESTORE_DIR"2223# Verify critical files exist24echo "Verifying critical files..."25CRITICAL_FILES=(26 "opt/docker/docker-compose.yml"27 "opt/backups/volumes"28)29for f in "${CRITICAL_FILES[@]}"; do30 if [ ! -e "$RESTORE_DIR/$f" ]; then31 echo "CRITICAL: Missing file: $f"32 exit 133 fi34done3536# Restore volumes and start services37echo "Restoring Docker volumes..."38cd "$RESTORE_DIR/opt/docker"39for archive in "$RESTORE_DIR/opt/backups/volumes/"*.tar.gz; do40 VOLUME_NAME=$(basename "$archive" | sed 's/-[0-9].*//g')41 docker volume create "$VOLUME_NAME"42 docker run --rm -v "$VOLUME_NAME:/target" -v "$(dirname "$archive"):/backup:ro" alpine tar xzf "/backup/$(basename "$archive")" -C /target43done4445echo "Starting services..."46docker compose up -d4748# Wait and check health49sleep 3050UNHEALTHY=$(docker compose ps --format json | grep -c '"unhealthy"' || true)51if [ "$UNHEALTHY" -gt 0 ]; then52 echo "WARNING: $UNHEALTHY unhealthy services"53 docker compose ps54else55 echo "All services healthy!"56fi5758echo "=== Restore Test Complete ==="Never skip restore testing because 'backups are running fine.' A backup that can't be restored is worthless. I've seen corrupted databases, missing encryption keys, and outdated restore scripts. The monthly test is the only way to be confident your recovery plan actually works.
04Writing a Recovery Runbook
1# recovery-template.env2# Fill in these values during recovery3# Sources: Bitwarden vault "Server Secrets" entry45# Database passwords6DB_PASSWORD=7REDIS_PASSWORD=89# Application secrets10AUTHENTIK_SECRET_KEY=11N8N_ENCRYPTION_KEY=12PAPERLESS_SECRET_KEY=1314# Backup credentials15RESTIC_PASSWORD=16B2_ACCOUNT_ID=17B2_ACCOUNT_KEY=1819# Domain and networking20DOMAIN=example.com21TAILSCALE_AUTH_KEY=2223# Email (for notifications and password recovery)24SMTP_HOST=25SMTP_USER=26SMTP_PASSWORD=