Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions optscale-deploy/backup_module/backup/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Stage 1: Builder
FROM debian:bookworm-slim AS builder

RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
unzip \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

# Install AWS CLI v2 (installs to /usr/local/aws-cli by default)
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
./aws/install && \
rm -rf awscliv2.zip aws

# Download and extract MongoDB tools
RUN curl -LO https://fastdl.mongodb.org/tools/db/mongodb-database-tools-debian12-x86_64-100.10.0.tgz && \
tar -xzf mongodb-database-tools-debian12-x86_64-100.10.0.tgz && \
mv mongodb-database-tools-*/bin/* /usr/local/bin/ && \
rm -rf mongodb-database-tools-*

# Install ClickHouse client (static binary)
RUN curl -LO https://github.com/ClickHouse/ClickHouse/releases/latest/download/clickhouse-client && \
chmod +x clickhouse-client && \
mv clickhouse-client /usr/local/bin/clickhouse-client


# Stage 2: Final lightweight image
FROM debian:bookworm-slim

RUN apt-get update && apt-get install -y --no-install-recommends \
default-mysql-client \
redis-tools \
curl \
ca-certificates \
bash \
groff \
less \
libgssapi-krb5-2 \
&& rm -rf /var/lib/apt/lists/*

# Copy AWS CLI v2 (full install dir + symlink)
COPY --from=builder /usr/local/aws-cli /usr/local/aws-cli
RUN ln -s /usr/local/aws-cli/v2/current/bin/aws /usr/local/bin/aws

# Copy MongoDB tools
COPY --from=builder /usr/local/bin/mongodump \
/usr/local/bin/mongorestore \
/usr/local/bin/mongoexport \
/usr/local/bin/mongoimport \
/usr/local/bin/mongostat \
/usr/local/bin/mongotop \
/usr/local/bin/bsondump \
/usr/local/bin/mongofiles \
/usr/local/bin/

# Copy ClickHouse client
COPY --from=builder /usr/local/bin/clickhouse-client /usr/local/bin/clickhouse-client

# Copy backup script
COPY backup.sh /usr/local/bin/backup.sh
RUN chmod +x /usr/local/bin/backup.sh

ENTRYPOINT ["/usr/local/bin/backup.sh"]
219 changes: 219 additions & 0 deletions optscale-deploy/backup_module/backup/backup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#!/bin/bash
set -uo pipefail

TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/tmp/backups/$TIMESTAMP"
CLICKHOUSE_DATA_DIR="$BACKUP_DIR/clickhouse_data"
mkdir -p "$BACKUP_DIR" "$CLICKHOUSE_DATA_DIR"

# Track what succeeded/failed for summary
MARIADB_STATUS="skipped"
MONGO_STATUS="skipped"
CLICKHOUSE_STATUS="skipped"
RABBITMQ_STATUS="skipped"
UPLOAD_STATUS="skipped"

echo "--- Starting Full Database Dump $TIMESTAMP ---"

# Required env vars:
# MARIADB_ROOT_PASSWORD, MONGO_ROOT_PASSWORD, CLICKHOUSE_PASSWORD, RABBITMQ_PASSWORD
# S3_BUCKET, S3_PREFIX
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION

# ─────────────────────────────────────────────
# 1. MariaDB (schema + data)
# ─────────────────────────────────────────────
echo ""
echo "[1/5] Backing up MariaDB..."
if mysqldump \
-h mariadb \
-u root \
-p"$MARIADB_ROOT_PASSWORD" \
--all-databases \
--single-transaction \
--routines \
--triggers \
--events \
> "$BACKUP_DIR/mariadb.sql" 2>/tmp/mariadb_err.txt; then
echo "MariaDB backup complete."
MARIADB_STATUS="ok"
else
echo "ERROR: MariaDB backup failed. Skipping."
cat /tmp/mariadb_err.txt || true
rm -f "$BACKUP_DIR/mariadb.sql"
MARIADB_STATUS="FAILED"
fi

# ─────────────────────────────────────────────
# 2. MongoDB (data only, no auth/user metadata)
# ─────────────────────────────────────────────
echo ""
echo "[2/5] Backing up MongoDB..."
if mongodump \
--host mongo-discovery \
--username root \
--password "$MONGO_ROOT_PASSWORD" \
--authenticationDatabase admin \
--db restapi \
--archive="$BACKUP_DIR/mongo.archive" \
2>/tmp/mongo_err.txt; then
echo "MongoDB backup complete."
MONGO_STATUS="ok"
else
echo "ERROR: MongoDB backup failed. Skipping."
cat /tmp/mongo_err.txt || true
rm -f "$BACKUP_DIR/mongo.archive"
MONGO_STATUS="FAILED"
fi

# ─────────────────────────────────────────────
# 3. ClickHouse (schemas + table data)
# ─────────────────────────────────────────────
echo ""
echo "[3/5] Backing up ClickHouse schemas and data..."

CH_FAIL=0

mapfile -t CLICKHOUSE_DATABASES < <(
clickhouse-client \
--host clickhouse \
--user default \
--password "$CLICKHOUSE_PASSWORD" \
--query "SHOW DATABASES" 2>/tmp/ch_err.txt \
| grep -Ev '^(system|information_schema|INFORMATION_SCHEMA)$'
) || CH_FAIL=1

if [ "$CH_FAIL" -eq 1 ]; then
echo "ERROR: Could not connect to ClickHouse. Skipping."
cat /tmp/ch_err.txt || true
CLICKHOUSE_STATUS="FAILED"
else
CLICKHOUSE_STATUS="ok"
for db in "${CLICKHOUSE_DATABASES[@]}"; do
[ -z "$db" ] && continue

echo " Backing up ClickHouse database schema: $db"
if ! clickhouse-client \
--host clickhouse \
--user default \
--password "$CLICKHOUSE_PASSWORD" \
--query "SHOW CREATE DATABASE \`$db\`" \
> "$BACKUP_DIR/clickhouse_${db}_schema.sql" 2>/tmp/ch_err.txt; then
echo " WARNING: Failed to back up schema for database $db. Skipping database."
cat /tmp/ch_err.txt || true
CLICKHOUSE_STATUS="partial"
continue
fi

DB_DIR="$CLICKHOUSE_DATA_DIR/$db"
mkdir -p "$DB_DIR"

mapfile -t TABLES < <(
clickhouse-client \
--host clickhouse \
--user default \
--password "$CLICKHOUSE_PASSWORD" \
--query "SHOW TABLES FROM \`$db\`" 2>/tmp/ch_err.txt
) || { echo " WARNING: Could not list tables for $db. Skipping."; cat /tmp/ch_err.txt || true; continue; }

for table in "${TABLES[@]}"; do
[ -z "$table" ] && continue

echo " Backing up ClickHouse table schema: $db.$table"
if ! clickhouse-client \
--host clickhouse \
--user default \
--password "$CLICKHOUSE_PASSWORD" \
--query "SHOW CREATE TABLE \`$db\`.\`$table\`" \
> "$DB_DIR/${table}.schema.sql" 2>/tmp/ch_err.txt; then
echo " WARNING: Failed schema for $db.$table. Skipping table."
cat /tmp/ch_err.txt || true
CLICKHOUSE_STATUS="partial"
continue
fi

echo " Backing up ClickHouse table data: $db.$table"
if ! clickhouse-client \
--host clickhouse \
--user default \
--password "$CLICKHOUSE_PASSWORD" \
--query "SELECT * FROM \`$db\`.\`$table\` FORMAT Native" \
> "$DB_DIR/${table}.native" 2>/tmp/ch_err.txt; then
echo " WARNING: Failed data dump for $db.$table. Skipping table data."
cat /tmp/ch_err.txt || true
rm -f "$DB_DIR/${table}.native"
CLICKHOUSE_STATUS="partial"
fi
done
done

echo "ClickHouse backup complete (status: $CLICKHOUSE_STATUS)."
fi

# ─────────────────────────────────────────────
# 4. RabbitMQ (definitions only)
# ─────────────────────────────────────────────
echo ""
echo "[4/5] Backing up RabbitMQ definitions..."

HTTP_STATUS=$(curl -s -o "$BACKUP_DIR/rabbitmq_defs.json" -w "%{http_code}" \
-u optscale:"$RABBITMQ_PASSWORD" \
http://rabbitmq:15672/api/definitions 2>/tmp/rabbitmq_err.txt) || HTTP_STATUS="000"

if [ "$HTTP_STATUS" = "200" ]; then
echo "RabbitMQ definitions backup complete."
echo "NOTE: Queue message contents are not included."
RABBITMQ_STATUS="ok"
else
echo "ERROR: RabbitMQ backup failed with HTTP $HTTP_STATUS. Skipping."
cat /tmp/rabbitmq_err.txt || true
rm -f "$BACKUP_DIR/rabbitmq_defs.json"
RABBITMQ_STATUS="FAILED (HTTP $HTTP_STATUS)"
fi

# ─────────────────────────────────────────────
# 5. Manifest + Upload to S3
# ─────────────────────────────────────────────
echo ""
echo "[5/5] Creating manifest and uploading to S3..."

{
echo "timestamp=$TIMESTAMP"
echo "created_at=$(date -Iseconds)"
echo "mariadb=$MARIADB_STATUS"
echo "mongodb=$MONGO_STATUS"
echo "clickhouse=$CLICKHOUSE_STATUS"
echo "rabbitmq=$RABBITMQ_STATUS"
echo "---files---"
find "$BACKUP_DIR" -maxdepth 5 -type f | sort
} > "$BACKUP_DIR/backup_manifest.txt"

if aws s3 cp "$BACKUP_DIR/" "s3://$S3_BUCKET/$S3_PREFIX/$TIMESTAMP/" --recursive 2>/tmp/s3_err.txt; then
echo "Backup uploaded to s3://$S3_BUCKET/$S3_PREFIX/$TIMESTAMP/"
UPLOAD_STATUS="ok"
else
echo "ERROR: S3 upload failed."
cat /tmp/s3_err.txt || true
UPLOAD_STATUS="FAILED"
fi

# ─────────────────────────────────────────────
# Summary
# ─────────────────────────────────────────────
echo ""
echo "======================================"
echo " Backup Summary — $TIMESTAMP"
echo "======================================"
echo " MariaDB : $MARIADB_STATUS"
echo " MongoDB : $MONGO_STATUS"
echo " ClickHouse : $CLICKHOUSE_STATUS"
echo " RabbitMQ : $RABBITMQ_STATUS"
echo " S3 Upload : $UPLOAD_STATUS"
echo "======================================"

if [ "$UPLOAD_STATUS" = "FAILED" ]; then
echo "CRITICAL: S3 upload failed. Exiting with error."
exit 1
fi

echo "Backup job finished."
69 changes: 69 additions & 0 deletions optscale-deploy/backup_module/backup/cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: all-db-backup
namespace: default
spec:
schedule: "0 2 * * *"
concurrencyPolicy: Forbid
jobTemplate:
spec:
ttlSecondsAfterFinished: 60
template:
spec:
restartPolicy: OnFailure
containers:
- name: backup-worker
image: registry-git.lsd.ufcg.edu.br/vtex-lab/mokirana/backup:latest
env:
- name: S3_BUCKET
value: "mokirana-backups"
- name: S3_PREFIX
value: "full-infrastructure"
- name: AWS_DEFAULT_REGION
value: "us-east-1"
- name: MARIADB_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: MARIADB_ROOT_PASSWORD
- name: MONGO_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: MONGO_ROOT_PASSWORD
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: CLICKHOUSE_PASSWORD
- name: RABBITMQ_PASSWORD
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: RABBITMQ_PASSWORD
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: AWS_SECRET_ACCESS_KEY
- name: AWS_SESSION_TOKEN
valueFrom:
secretKeyRef:
name: multi-db-backup-secrets
key: AWS_SESSION_TOKEN
volumes:
- name: backup-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: "gp3-tagged"
resources:
requests:
storage: 1Ti
Loading