Automate Your Backups with MinIO: Free S3-Compatible Storage for Everything


Automate Your Backups with MinIO: Free S3-Compatible Storage for Everything

Paying AWS for backup storage? Running out of disk on your server? There’s a better way.

MinIO gives you S3-compatible object storage on your own hardware — for free. Combined with a few Python scripts, you get fully automated backups for databases, files, and application data.

Why MinIO for Backups?

  • Free — no per-GB charges, no request fees
  • S3-compatible — use boto3, aws-cli, or any S3 tool
  • Fast local access — LAN speeds, not internet latency
  • Erasure coding — data survives disk failures
  • Encryption — AES-256 at rest, TLS in transit

Setup MinIO in 30 Seconds

docker run -d --name minio-backup \
  -p 9000:9000 -p 9001:9001 \
  -v /mnt/backup-drive:/data \
  -e MINIO_ROOT_USER=backup_admin \
  -e MINIO_ROOT_PASSWORD=your_secure_password_here \
  minio/minio server /data --console-address ":9001"

Point -v to your backup drive — an external SSD, NAS mount, or second disk.

The Universal Backup Script

This script handles any directory backup with compression, rotation, and verification:

#!/usr/bin/env python3
"""Universal backup script for MinIO."""

import boto3
import tarfile
import hashlib
import os
import sys
from datetime import datetime
from botocore.config import Config

class MinIOBackup:
    def __init__(self, endpoint="http://localhost:9000", access_key="backup_admin",
                 secret_key="your_secure_password_here"):
        self.s3 = boto3.client(
            "s3",
            endpoint_url=endpoint,
            aws_access_key_id=access_key,
            aws_secret_access_key=secret_key,
            config=Config(signature_version="s3v4"),
        )

    def ensure_bucket(self, bucket: str):
        """Create bucket if it doesn't exist."""
        try:
            self.s3.head_bucket(Bucket=bucket)
        except:
            self.s3.create_bucket(Bucket=bucket)
            print(f"Created bucket: {bucket}")

    def backup_directory(self, source_dir: str, bucket: str, keep_last: int = 7):
        """Compress and upload a directory."""
        self.ensure_bucket(bucket)

        dir_name = os.path.basename(source_dir.rstrip("/"))
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        archive = f"/tmp/{dir_name}_{timestamp}.tar.gz"

        # Compress
        print(f"Compressing {source_dir}...")
        with tarfile.open(archive, "w:gz") as tar:
            tar.add(source_dir, arcname=dir_name)

        size_mb = os.path.getsize(archive) / 1024 / 1024

        # Calculate checksum
        md5 = hashlib.md5()
        with open(archive, "rb") as f:
            for chunk in iter(lambda: f.read(8192), b""):
                md5.update(chunk)
        checksum = md5.hexdigest()

        # Upload
        s3_key = f"{dir_name}/{os.path.basename(archive)}"
        print(f"Uploading {size_mb:.1f} MB to {bucket}/{s3_key}...")
        self.s3.upload_file(archive, bucket, s3_key, ExtraArgs={
            "Metadata": {"checksum-md5": checksum, "source": source_dir}
        })

        # Verify
        response = self.s3.head_object(Bucket=bucket, Key=s3_key)
        remote_size = response["ContentLength"] / 1024 / 1024
        print(f"Verified: {remote_size:.1f} MB uploaded (md5: {checksum[:8]}...)")

        # Cleanup local
        os.remove(archive)

        # Rotate old backups
        self._rotate(bucket, f"{dir_name}/", keep_last)

        return s3_key

    def backup_postgres(self, db_name: str, bucket: str, keep_last: int = 14):
        """Dump and upload a PostgreSQL database."""
        import subprocess
        self.ensure_bucket(bucket)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        dump_file = f"/tmp/{db_name}_{timestamp}.sql.gz"

        print(f"Dumping database {db_name}...")
        result = subprocess.run(
            f"pg_dump {db_name} | gzip > {dump_file}",
            shell=True, capture_output=True, text=True
        )
        if result.returncode != 0:
            print(f"pg_dump failed: {result.stderr}")
            return None

        size_mb = os.path.getsize(dump_file) / 1024 / 1024
        s3_key = f"postgres/{db_name}/{os.path.basename(dump_file)}"

        print(f"Uploading {size_mb:.1f} MB to {bucket}/{s3_key}...")
        self.s3.upload_file(dump_file, bucket, s3_key)
        os.remove(dump_file)

        self._rotate(bucket, f"postgres/{db_name}/", keep_last)
        print(f"Database backup complete: {s3_key}")
        return s3_key

    def backup_mysql(self, db_name: str, bucket: str, user="root",
                     password="", keep_last: int = 14):
        """Dump and upload a MySQL database."""
        import subprocess
        self.ensure_bucket(bucket)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        dump_file = f"/tmp/{db_name}_{timestamp}.sql.gz"

        cmd = f"mysqldump -u{user}"
        if password:
            cmd += f" -p{password}"
        cmd += f" {db_name} | gzip > {dump_file}"

        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"mysqldump failed: {result.stderr}")
            return None

        size_mb = os.path.getsize(dump_file) / 1024 / 1024
        s3_key = f"mysql/{db_name}/{os.path.basename(dump_file)}"

        self.s3.upload_file(dump_file, bucket, s3_key)
        os.remove(dump_file)
        self._rotate(bucket, f"mysql/{db_name}/", keep_last)
        print(f"MySQL backup complete: {s3_key} ({size_mb:.1f} MB)")
        return s3_key

    def list_backups(self, bucket: str, prefix: str = ""):
        """List all backups."""
        response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        for obj in response.get("Contents", []):
            size_mb = obj["Size"] / 1024 / 1024
            print(f"  {obj['Key']:50s} | {size_mb:>8.1f} MB | {obj['LastModified']}")

    def restore_latest(self, bucket: str, prefix: str, dest_dir: str):
        """Download and extract the latest backup."""
        response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])

        if not objects:
            print(f"No backups found with prefix: {prefix}")
            return None

        latest = objects[-1]
        local_file = f"/tmp/{os.path.basename(latest['Key'])}"

        print(f"Downloading {latest['Key']}...")
        self.s3.download_file(bucket, latest["Key"], local_file)

        print(f"Extracting to {dest_dir}...")
        os.makedirs(dest_dir, exist_ok=True)
        with tarfile.open(local_file, "r:gz") as tar:
            tar.extractall(dest_dir)

        os.remove(local_file)
        print(f"Restored to {dest_dir}")
        return dest_dir

    def _rotate(self, bucket: str, prefix: str, keep: int):
        """Delete old backups, keeping only the last N."""
        response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])

        if len(objects) > keep:
            to_delete = objects[:-keep]
            for obj in to_delete:
                self.s3.delete_object(Bucket=bucket, Key=obj["Key"])
                print(f"  Rotated: {obj['Key']}")

# Usage
if __name__ == "__main__":
    backup = MinIOBackup()

    # Backup a project directory
    backup.backup_directory("/home/user/myproject", "backups")

    # Backup PostgreSQL
    backup.backup_postgres("myapp_production", "backups")

    # List all backups
    backup.list_backups("backups")

    # Restore latest
    # backup.restore_latest("backups", "myproject/", "/tmp/restore")

Cron Setup: Automatic Daily Backups

# Edit crontab
crontab -e

# Add these lines:

# Database backup at 1:00 AM
0 1 * * * /usr/bin/python3 /opt/scripts/backup.py db >> /var/log/backup.log 2>&1

# Project files at 2:00 AM
0 2 * * * /usr/bin/python3 /opt/scripts/backup.py files >> /var/log/backup.log 2>&1

# Config files at 3:00 AM
0 3 * * * /usr/bin/python3 /opt/scripts/backup.py config >> /var/log/backup.log 2>&1

Monitoring Backup Health

from datetime import datetime, timedelta

def check_backup_health(backup: MinIOBackup, bucket: str, prefix: str,
                        max_age_hours: int = 25):
    """Alert if backup is too old or too small."""
    response = backup.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])

    if not objects:
        return {"status": "CRITICAL", "message": f"No backups found for {prefix}"}

    latest = objects[-1]
    age = datetime.now(latest["LastModified"].tzinfo) - latest["LastModified"]
    size_mb = latest["Size"] / 1024 / 1024

    if age > timedelta(hours=max_age_hours):
        return {"status": "WARNING", "message": f"Latest backup is {age.total_seconds()/3600:.1f}h old"}

    if size_mb < 0.001:
        return {"status": "CRITICAL", "message": f"Latest backup is suspiciously small: {size_mb:.4f} MB"}

    return {
        "status": "OK",
        "message": f"Latest: {latest['Key']} ({size_mb:.1f} MB, {age.total_seconds()/3600:.1f}h ago)"
    }

# Check all backup sets
for prefix in ["myproject/", "postgres/myapp/", "configs/"]:
    result = check_backup_health(MinIOBackup(), "backups", prefix)
    print(f"[{result['status']:8s}] {result['message']}")

Multi-Server Backup Architecture

┌──────────┐     ┌──────────┐     ┌──────────┐
│ Server 1 │     │ Server 2 │     │ Server 3 │
│ (app)    │     │ (db)     │     │ (files)  │
└────┬─────┘     └────┬─────┘     └────┬─────┘
     │                │                │
     └────────┬───────┴────────┬───────┘
              │                │
         ┌────▼────┐     ┌─────▼─────┐
         │ MinIO   │     │ MinIO     │
         │ Primary │────▶│ Replica   │
         │ (SSD)   │     │ (offsite) │
         └─────────┘     └───────────┘

MinIO supports replication between sites. Your primary backup server syncs to an offsite replica automatically.

Storage Cost Comparison

For 500GB of backup data:

SolutionMonthly Cost
AWS S3 Standard~$11.50 + request fees
AWS S3 Glacier~$2.00 + retrieval fees
Google Cloud Storage~$10.00
MinIO (self-hosted)$0 (your hardware)
MinIO (cheap VPS + disk)~$5-10 (Hetzner, OVH)

For terabytes of data, MinIO on a dedicated server saves hundreds per month.

Summary

MinIO + Python + cron = automated backup system that:

  • Costs nothing beyond hardware
  • Uses the same S3 API you already know
  • Compresses, uploads, verifies, and rotates automatically
  • Works with any database (PostgreSQL, MySQL, MongoDB)
  • Scales from a Raspberry Pi to a data center

Stop paying cloud storage fees for backups. Run MinIO.


Need a custom automation or scraping tool? 79+ production actors on Apify Store. Questions or custom work? Email spinov001@gmail.com — more tips at t.me/scraping_ai