Automate Your Backups with MinIO: Free S3-Compatible Storage for Everything
Automate Your Backups with MinIO: Free S3-Compatible Storage for Everything
Paying AWS for backup storage? Running out of disk on your server? There’s a better way.
MinIO gives you S3-compatible object storage on your own hardware — for free. Combined with a few Python scripts, you get fully automated backups for databases, files, and application data.
Why MinIO for Backups?
- Free — no per-GB charges, no request fees
- S3-compatible — use boto3, aws-cli, or any S3 tool
- Fast local access — LAN speeds, not internet latency
- Erasure coding — data survives disk failures
- Encryption — AES-256 at rest, TLS in transit
Setup MinIO in 30 Seconds
docker run -d --name minio-backup \
-p 9000:9000 -p 9001:9001 \
-v /mnt/backup-drive:/data \
-e MINIO_ROOT_USER=backup_admin \
-e MINIO_ROOT_PASSWORD=your_secure_password_here \
minio/minio server /data --console-address ":9001"
Point -v to your backup drive — an external SSD, NAS mount, or second disk.
The Universal Backup Script
This script handles any directory backup with compression, rotation, and verification:
#!/usr/bin/env python3
"""Universal backup script for MinIO."""
import boto3
import tarfile
import hashlib
import os
import sys
from datetime import datetime
from botocore.config import Config
class MinIOBackup:
def __init__(self, endpoint="http://localhost:9000", access_key="backup_admin",
secret_key="your_secure_password_here"):
self.s3 = boto3.client(
"s3",
endpoint_url=endpoint,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
config=Config(signature_version="s3v4"),
)
def ensure_bucket(self, bucket: str):
"""Create bucket if it doesn't exist."""
try:
self.s3.head_bucket(Bucket=bucket)
except:
self.s3.create_bucket(Bucket=bucket)
print(f"Created bucket: {bucket}")
def backup_directory(self, source_dir: str, bucket: str, keep_last: int = 7):
"""Compress and upload a directory."""
self.ensure_bucket(bucket)
dir_name = os.path.basename(source_dir.rstrip("/"))
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
archive = f"/tmp/{dir_name}_{timestamp}.tar.gz"
# Compress
print(f"Compressing {source_dir}...")
with tarfile.open(archive, "w:gz") as tar:
tar.add(source_dir, arcname=dir_name)
size_mb = os.path.getsize(archive) / 1024 / 1024
# Calculate checksum
md5 = hashlib.md5()
with open(archive, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
md5.update(chunk)
checksum = md5.hexdigest()
# Upload
s3_key = f"{dir_name}/{os.path.basename(archive)}"
print(f"Uploading {size_mb:.1f} MB to {bucket}/{s3_key}...")
self.s3.upload_file(archive, bucket, s3_key, ExtraArgs={
"Metadata": {"checksum-md5": checksum, "source": source_dir}
})
# Verify
response = self.s3.head_object(Bucket=bucket, Key=s3_key)
remote_size = response["ContentLength"] / 1024 / 1024
print(f"Verified: {remote_size:.1f} MB uploaded (md5: {checksum[:8]}...)")
# Cleanup local
os.remove(archive)
# Rotate old backups
self._rotate(bucket, f"{dir_name}/", keep_last)
return s3_key
def backup_postgres(self, db_name: str, bucket: str, keep_last: int = 14):
"""Dump and upload a PostgreSQL database."""
import subprocess
self.ensure_bucket(bucket)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
dump_file = f"/tmp/{db_name}_{timestamp}.sql.gz"
print(f"Dumping database {db_name}...")
result = subprocess.run(
f"pg_dump {db_name} | gzip > {dump_file}",
shell=True, capture_output=True, text=True
)
if result.returncode != 0:
print(f"pg_dump failed: {result.stderr}")
return None
size_mb = os.path.getsize(dump_file) / 1024 / 1024
s3_key = f"postgres/{db_name}/{os.path.basename(dump_file)}"
print(f"Uploading {size_mb:.1f} MB to {bucket}/{s3_key}...")
self.s3.upload_file(dump_file, bucket, s3_key)
os.remove(dump_file)
self._rotate(bucket, f"postgres/{db_name}/", keep_last)
print(f"Database backup complete: {s3_key}")
return s3_key
def backup_mysql(self, db_name: str, bucket: str, user="root",
password="", keep_last: int = 14):
"""Dump and upload a MySQL database."""
import subprocess
self.ensure_bucket(bucket)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
dump_file = f"/tmp/{db_name}_{timestamp}.sql.gz"
cmd = f"mysqldump -u{user}"
if password:
cmd += f" -p{password}"
cmd += f" {db_name} | gzip > {dump_file}"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"mysqldump failed: {result.stderr}")
return None
size_mb = os.path.getsize(dump_file) / 1024 / 1024
s3_key = f"mysql/{db_name}/{os.path.basename(dump_file)}"
self.s3.upload_file(dump_file, bucket, s3_key)
os.remove(dump_file)
self._rotate(bucket, f"mysql/{db_name}/", keep_last)
print(f"MySQL backup complete: {s3_key} ({size_mb:.1f} MB)")
return s3_key
def list_backups(self, bucket: str, prefix: str = ""):
"""List all backups."""
response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
for obj in response.get("Contents", []):
size_mb = obj["Size"] / 1024 / 1024
print(f" {obj['Key']:50s} | {size_mb:>8.1f} MB | {obj['LastModified']}")
def restore_latest(self, bucket: str, prefix: str, dest_dir: str):
"""Download and extract the latest backup."""
response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])
if not objects:
print(f"No backups found with prefix: {prefix}")
return None
latest = objects[-1]
local_file = f"/tmp/{os.path.basename(latest['Key'])}"
print(f"Downloading {latest['Key']}...")
self.s3.download_file(bucket, latest["Key"], local_file)
print(f"Extracting to {dest_dir}...")
os.makedirs(dest_dir, exist_ok=True)
with tarfile.open(local_file, "r:gz") as tar:
tar.extractall(dest_dir)
os.remove(local_file)
print(f"Restored to {dest_dir}")
return dest_dir
def _rotate(self, bucket: str, prefix: str, keep: int):
"""Delete old backups, keeping only the last N."""
response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])
if len(objects) > keep:
to_delete = objects[:-keep]
for obj in to_delete:
self.s3.delete_object(Bucket=bucket, Key=obj["Key"])
print(f" Rotated: {obj['Key']}")
# Usage
if __name__ == "__main__":
backup = MinIOBackup()
# Backup a project directory
backup.backup_directory("/home/user/myproject", "backups")
# Backup PostgreSQL
backup.backup_postgres("myapp_production", "backups")
# List all backups
backup.list_backups("backups")
# Restore latest
# backup.restore_latest("backups", "myproject/", "/tmp/restore")
Cron Setup: Automatic Daily Backups
# Edit crontab
crontab -e
# Add these lines:
# Database backup at 1:00 AM
0 1 * * * /usr/bin/python3 /opt/scripts/backup.py db >> /var/log/backup.log 2>&1
# Project files at 2:00 AM
0 2 * * * /usr/bin/python3 /opt/scripts/backup.py files >> /var/log/backup.log 2>&1
# Config files at 3:00 AM
0 3 * * * /usr/bin/python3 /opt/scripts/backup.py config >> /var/log/backup.log 2>&1
Monitoring Backup Health
from datetime import datetime, timedelta
def check_backup_health(backup: MinIOBackup, bucket: str, prefix: str,
max_age_hours: int = 25):
"""Alert if backup is too old or too small."""
response = backup.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
objects = sorted(response.get("Contents", []), key=lambda x: x["LastModified"])
if not objects:
return {"status": "CRITICAL", "message": f"No backups found for {prefix}"}
latest = objects[-1]
age = datetime.now(latest["LastModified"].tzinfo) - latest["LastModified"]
size_mb = latest["Size"] / 1024 / 1024
if age > timedelta(hours=max_age_hours):
return {"status": "WARNING", "message": f"Latest backup is {age.total_seconds()/3600:.1f}h old"}
if size_mb < 0.001:
return {"status": "CRITICAL", "message": f"Latest backup is suspiciously small: {size_mb:.4f} MB"}
return {
"status": "OK",
"message": f"Latest: {latest['Key']} ({size_mb:.1f} MB, {age.total_seconds()/3600:.1f}h ago)"
}
# Check all backup sets
for prefix in ["myproject/", "postgres/myapp/", "configs/"]:
result = check_backup_health(MinIOBackup(), "backups", prefix)
print(f"[{result['status']:8s}] {result['message']}")
Multi-Server Backup Architecture
┌──────────┐ ┌──────────┐ ┌──────────┐
│ Server 1 │ │ Server 2 │ │ Server 3 │
│ (app) │ │ (db) │ │ (files) │
└────┬─────┘ └────┬─────┘ └────┬─────┘
│ │ │
└────────┬───────┴────────┬───────┘
│ │
┌────▼────┐ ┌─────▼─────┐
│ MinIO │ │ MinIO │
│ Primary │────▶│ Replica │
│ (SSD) │ │ (offsite) │
└─────────┘ └───────────┘
MinIO supports replication between sites. Your primary backup server syncs to an offsite replica automatically.
Storage Cost Comparison
For 500GB of backup data:
| Solution | Monthly Cost |
|---|---|
| AWS S3 Standard | ~$11.50 + request fees |
| AWS S3 Glacier | ~$2.00 + retrieval fees |
| Google Cloud Storage | ~$10.00 |
| MinIO (self-hosted) | $0 (your hardware) |
| MinIO (cheap VPS + disk) | ~$5-10 (Hetzner, OVH) |
For terabytes of data, MinIO on a dedicated server saves hundreds per month.
Summary
MinIO + Python + cron = automated backup system that:
- Costs nothing beyond hardware
- Uses the same S3 API you already know
- Compresses, uploads, verifies, and rotates automatically
- Works with any database (PostgreSQL, MySQL, MongoDB)
- Scales from a Raspberry Pi to a data center
Stop paying cloud storage fees for backups. Run MinIO.
Need a custom automation or scraping tool? 79+ production actors on Apify Store. Questions or custom work? Email spinov001@gmail.com — more tips at t.me/scraping_ai