From e8ad6ceba4a825848a5ac1b878ee5878a54a557b Mon Sep 17 00:00:00 2001 From: Elliott Shugerman Date: Fri, 29 Nov 2019 22:17:47 -0700 Subject: [PATCH] script cleanup, misc changes - --blobs by default - always drop and recreate schemas, specified by DROP_SCHEMAS --- README.md | 43 ++++++++++++++++++------- render.py | 2 +- src/10.Dockerfile | 26 ++++++++-------- src/11.Dockerfile | 26 ++++++++-------- src/12.Dockerfile | 26 ++++++++-------- src/9.Dockerfile | 26 ++++++++-------- src/backup.sh | 59 ++++++++++++++++++++++------------- src/install.sh | 5 ++- src/restore.sh | 76 +++++++++++++++++++++++++++------------------ src/run.sh | 8 ++--- template.Dockerfile | 26 ++++++++-------- 11 files changed, 188 insertions(+), 135 deletions(-) mode change 100644 => 100755 render.py diff --git a/README.md b/README.md index 2bb3918..5e984f8 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Overview -This project provides Docker containers to backup/restore a PostgreSQL database to/from AWS S3 (or a compatible service like DigitalOcean Spaces). Both one-off and periodic/scheduled backups are supported. +# Introduction +This project provides Docker images to periodically backup a PostgreSQL database to AWS S3, and to restore from the backup as needed. # Credit where due This repository is a fork and re-structuring of schickling's [postgres-backup-s3](https://github.com/schickling/dockerfiles/tree/master/postgres-backup-s3) and [postgres-restore-s3](https://github.com/schickling/dockerfiles/tree/master/postgres-restore-s3). @@ -8,8 +8,16 @@ Fork goals: - [x] dedicated repository - [x] automated builds - [x] support multiple PostgreSQL versions - - [ ] support encrypted (password-protected) backups - [x] merge backup and restore images? + - [x] support encrypted (password-protected) backups + - [x] option to restore from specific backup by timestamp + +Other changes: + - uses `pg_dump`'s `custom` format (see [docs](https://www.postgresql.org/docs/10/app-pgdump.html)) + - backup blobs and all schemas by default + - recreate all database objects on restore + - some env vars renamed + - only scheduled backups supported, not ad-hoc ------- @@ -17,15 +25,16 @@ Fork goals: ## Backup ```yaml postgres: - image: postgres + image: postgres:11 environment: POSTGRES_USER: user POSTGRES_PASSWORD: password pgbackups3: - image: eeshugerman/postgres-backup-s3 + image: eeshugerman/postgres-backup-s3:11 environment: SCHEDULE: '@daily' + PASSPHRASE: passphrase S3_REGION: region S3_ACCESS_KEY_ID: key S3_SECRET_ACCESS_KEY: secret @@ -34,17 +43,27 @@ pgbackups3: POSTGRES_DATABASE: dbname POSTGRES_USER: user POSTGRES_PASSWORD: password - POSTGRES_EXTRA_OPTS: '--schema=public --blobs' ``` ### Notes -#### Periodic backups -The `SCHEDULE` variable is determines backup frequency. It is optional -- without it, the backup will run once at start up. More information about the scheduling can be found [here](http://godoc.org/github.com/robfig/cron#hdr-Predefined_schedules). - -#### Docker -Docker Compose is by no means required, you can use plain ol' Docker too -- just set the required env vars with the `-e` flag. +#### PostgreSQL version +Images are tagged by the major PostgreSQL version they support: `9`, `10`, `11`, or `12`. +#### Scheduling +The `SCHEDULE` variable is determines backup frequency. See go-cron schedules documentation [here](http://godoc.org/github.com/robfig/cron#hdr-Predefined_schedules). +#### Encrypted backups +If `PASSPHRASE` is provided, the backup will be encrypted using GPG. ## Restore -With the container running, +> WARNING: DATA LOSS! All database objects will be dropped and recreated. + +### From latest backup (based on unix sort) ```sh docker exec sh restore.sh ``` +#### Notes +- If your bucket has more than a 1000 files the latest may not be restored, only one S3 `ls` command is used +- Your S3 prefix should only contain backups which you wish to restore - 'latest' is determined based on unix sort with no filtering + +### From specific backup +```sh +docker exec sh restore.sh +``` diff --git a/render.py b/render.py old mode 100644 new mode 100755 index adcc164..455b1aa --- a/render.py +++ b/render.py @@ -1,4 +1,4 @@ -#!/bin/python3 +#!/usr/bin/python3 VERSIONS = ( ('9', '3.6'), diff --git a/src/10.Dockerfile b/src/10.Dockerfile index f719863..6938a1b 100644 --- a/src/10.Dockerfile +++ b/src/10.Dockerfile @@ -6,21 +6,21 @@ FROM alpine:3.8 ADD install.sh install.sh RUN sh install.sh && rm install.sh -ENV POSTGRES_DATABASE **None** -ENV POSTGRES_HOST **None** +ENV POSTGRES_DATABASE '' +ENV POSTGRES_HOST '' ENV POSTGRES_PORT 5432 -ENV POSTGRES_USER **None** -ENV POSTGRES_PASSWORD **None** -ENV POSTGRES_EXTRA_OPTS '' -ENV S3_ACCESS_KEY_ID **None** -ENV S3_SECRET_ACCESS_KEY **None** -ENV S3_BUCKET **None** -ENV S3_REGION us-west-1 +ENV POSTGRES_USER '' +ENV POSTGRES_PASSWORD '' +ENV PGDUMP_EXTRA_OPTS '' +ENV S3_ACCESS_KEY_ID '' +ENV S3_SECRET_ACCESS_KEY '' +ENV S3_BUCKET '' +ENV S3_REGION 'us-west-1' ENV S3_PATH 'backup' -ENV S3_ENDPOINT **None** -ENV S3_S3V4 no -ENV SCHEDULE **None** -ENV DROP_PUBLIC no +ENV S3_ENDPOINT '' +ENV S3_S3V4 'no' +ENV SCHEDULE '' +ENV PASSPHRASE '' ADD run.sh run.sh ADD backup.sh backup.sh diff --git a/src/11.Dockerfile b/src/11.Dockerfile index 2c51abc..4fc9334 100644 --- a/src/11.Dockerfile +++ b/src/11.Dockerfile @@ -6,21 +6,21 @@ FROM alpine:3.10 ADD install.sh install.sh RUN sh install.sh && rm install.sh -ENV POSTGRES_DATABASE **None** -ENV POSTGRES_HOST **None** +ENV POSTGRES_DATABASE '' +ENV POSTGRES_HOST '' ENV POSTGRES_PORT 5432 -ENV POSTGRES_USER **None** -ENV POSTGRES_PASSWORD **None** -ENV POSTGRES_EXTRA_OPTS '' -ENV S3_ACCESS_KEY_ID **None** -ENV S3_SECRET_ACCESS_KEY **None** -ENV S3_BUCKET **None** -ENV S3_REGION us-west-1 +ENV POSTGRES_USER '' +ENV POSTGRES_PASSWORD '' +ENV PGDUMP_EXTRA_OPTS '' +ENV S3_ACCESS_KEY_ID '' +ENV S3_SECRET_ACCESS_KEY '' +ENV S3_BUCKET '' +ENV S3_REGION 'us-west-1' ENV S3_PATH 'backup' -ENV S3_ENDPOINT **None** -ENV S3_S3V4 no -ENV SCHEDULE **None** -ENV DROP_PUBLIC no +ENV S3_ENDPOINT '' +ENV S3_S3V4 'no' +ENV SCHEDULE '' +ENV PASSPHRASE '' ADD run.sh run.sh ADD backup.sh backup.sh diff --git a/src/12.Dockerfile b/src/12.Dockerfile index 29900eb..00382dc 100644 --- a/src/12.Dockerfile +++ b/src/12.Dockerfile @@ -6,21 +6,21 @@ FROM alpine:edge ADD install.sh install.sh RUN sh install.sh && rm install.sh -ENV POSTGRES_DATABASE **None** -ENV POSTGRES_HOST **None** +ENV POSTGRES_DATABASE '' +ENV POSTGRES_HOST '' ENV POSTGRES_PORT 5432 -ENV POSTGRES_USER **None** -ENV POSTGRES_PASSWORD **None** -ENV POSTGRES_EXTRA_OPTS '' -ENV S3_ACCESS_KEY_ID **None** -ENV S3_SECRET_ACCESS_KEY **None** -ENV S3_BUCKET **None** -ENV S3_REGION us-west-1 +ENV POSTGRES_USER '' +ENV POSTGRES_PASSWORD '' +ENV PGDUMP_EXTRA_OPTS '' +ENV S3_ACCESS_KEY_ID '' +ENV S3_SECRET_ACCESS_KEY '' +ENV S3_BUCKET '' +ENV S3_REGION 'us-west-1' ENV S3_PATH 'backup' -ENV S3_ENDPOINT **None** -ENV S3_S3V4 no -ENV SCHEDULE **None** -ENV DROP_PUBLIC no +ENV S3_ENDPOINT '' +ENV S3_S3V4 'no' +ENV SCHEDULE '' +ENV PASSPHRASE '' ADD run.sh run.sh ADD backup.sh backup.sh diff --git a/src/9.Dockerfile b/src/9.Dockerfile index 4a3ce3c..f3705e9 100644 --- a/src/9.Dockerfile +++ b/src/9.Dockerfile @@ -6,21 +6,21 @@ FROM alpine:3.6 ADD install.sh install.sh RUN sh install.sh && rm install.sh -ENV POSTGRES_DATABASE **None** -ENV POSTGRES_HOST **None** +ENV POSTGRES_DATABASE '' +ENV POSTGRES_HOST '' ENV POSTGRES_PORT 5432 -ENV POSTGRES_USER **None** -ENV POSTGRES_PASSWORD **None** -ENV POSTGRES_EXTRA_OPTS '' -ENV S3_ACCESS_KEY_ID **None** -ENV S3_SECRET_ACCESS_KEY **None** -ENV S3_BUCKET **None** -ENV S3_REGION us-west-1 +ENV POSTGRES_USER '' +ENV POSTGRES_PASSWORD '' +ENV PGDUMP_EXTRA_OPTS '' +ENV S3_ACCESS_KEY_ID '' +ENV S3_SECRET_ACCESS_KEY '' +ENV S3_BUCKET '' +ENV S3_REGION 'us-west-1' ENV S3_PATH 'backup' -ENV S3_ENDPOINT **None** -ENV S3_S3V4 no -ENV SCHEDULE **None** -ENV DROP_PUBLIC no +ENV S3_ENDPOINT '' +ENV S3_S3V4 'no' +ENV SCHEDULE '' +ENV PASSPHRASE '' ADD run.sh run.sh ADD backup.sh backup.sh diff --git a/src/backup.sh b/src/backup.sh index 6e5a7f0..e5a243d 100644 --- a/src/backup.sh +++ b/src/backup.sh @@ -1,30 +1,30 @@ #! /bin/sh -set -e +set -eu set -o pipefail -if [ "${S3_ACCESS_KEY_ID}" = "**None**" ]; then +if [ -z "$S3_ACCESS_KEY_ID" ]; then echo "You need to set the S3_ACCESS_KEY_ID environment variable." exit 1 fi -if [ "${S3_SECRET_ACCESS_KEY}" = "**None**" ]; then +if [ -z "$S3_SECRET_ACCESS_KEY" ]; then echo "You need to set the S3_SECRET_ACCESS_KEY environment variable." exit 1 fi -if [ "${S3_BUCKET}" = "**None**" ]; then +if [ -z "$S3_BUCKET" ]; then echo "You need to set the S3_BUCKET environment variable." exit 1 fi -if [ "${POSTGRES_DATABASE}" = "**None**" ]; then +if [ -z "$POSTGRES_DATABASE" ]; then echo "You need to set the POSTGRES_DATABASE environment variable." exit 1 fi -if [ "${POSTGRES_HOST}" = "**None**" ]; then - if [ -n "${POSTGRES_PORT_5432_TCP_ADDR}" ]; then +if [ -z "$POSTGRES_HOST" ]; then + if [ -n "$POSTGRES_PORT_5432_TCP_ADDR" ]; then POSTGRES_HOST=$POSTGRES_PORT_5432_TCP_ADDR POSTGRES_PORT=$POSTGRES_PORT_5432_TCP_PORT else @@ -33,36 +33,53 @@ if [ "${POSTGRES_HOST}" = "**None**" ]; then fi fi -if [ "${POSTGRES_USER}" = "**None**" ]; then +if [ -z "$POSTGRES_USER" ]; then echo "You need to set the POSTGRES_USER environment variable." exit 1 fi -if [ "${POSTGRES_PASSWORD}" = "**None**" ]; then - echo "You need to set the POSTGRES_PASSWORD environment variable or link to a container named POSTGRES." +if [ -z "$POSTGRES_PASSWORD" ]; then + echo "You need to set the POSTGRES_PASSWORD environment variable." exit 1 fi -if [ "${S3_ENDPOINT}" == "**None**" ]; then - AWS_ARGS="" +if [ -z "$S3_ENDPOINT" ]; then + aws_args="" else - AWS_ARGS="--endpoint-url ${S3_ENDPOINT}" + aws_args="--endpoint-url $S3_ENDPOINT" fi -# env vars needed for aws tools + export AWS_ACCESS_KEY_ID=$S3_ACCESS_KEY_ID export AWS_SECRET_ACCESS_KEY=$S3_SECRET_ACCESS_KEY export AWS_DEFAULT_REGION=$S3_REGION - export PGPASSWORD=$POSTGRES_PASSWORD -POSTGRES_HOST_OPTS="-h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER $POSTGRES_EXTRA_OPTS" -echo "Creating dump of ${POSTGRES_DATABASE} database from ${POSTGRES_HOST}..." +echo "Creating backup of $POSTGRES_DATABASE database..." +pg_dump --format=custom \ + -h $POSTGRES_HOST \ + -p $POSTGRES_PORT \ + -U $POSTGRES_USER \ + -d $POSTGRES_DATABASE \ + $PGDUMP_EXTRA_OPTS \ + > db.dump -pg_dump $POSTGRES_HOST_OPTS $POSTGRES_DATABASE | gzip > dump.sql.gz +timestamp=$(date +"%Y-%m-%dT%H:%M:%S") +s3_uri_base="s3://${S3_BUCKET}/${S3_PREFIX}/${POSTGRES_DATABASE}_${timestamp}.dump" -echo "Uploading dump to $S3_BUCKET" +if [ -n "$PASSPHRASE" ]; then + echo "Encrypting backup..." + gpg --symmetric --batch --passphrase "$PASSPHRASE" db.dump + rm db.dump + local_file="db.dump.gpg" + s3_uri="${s3_uri_base}.gpg" +else + local_file="db.dump" + s3_uri="$s3_uri_base" +fi -cat dump.sql.gz | aws $AWS_ARGS s3 cp - s3://$S3_BUCKET/$S3_PREFIX/${POSTGRES_DATABASE}_$(date +"%Y-%m-%dT%H:%M:%SZ").sql.gz || exit 2 +echo "Uploading backup to $S3_BUCKET..." +aws $aws_args s3 cp "$local_file" "$s3_uri" +rm "$local_file" -echo "SQL backup uploaded successfully" +echo "Backup complete." diff --git a/src/install.sh b/src/install.sh index 8d4f00a..3ae5fbb 100644 --- a/src/install.sh +++ b/src/install.sh @@ -1,13 +1,16 @@ #! /bin/sh -# exit if a command fails set -e +set -o pipefail apk update # install pg_dump apk add postgresql-client +# install gpg +apk add gnupg + # install s3 tools apk add python py-pip pip install awscli diff --git a/src/restore.sh b/src/restore.sh index a433d7a..146a83d 100644 --- a/src/restore.sh +++ b/src/restore.sh @@ -1,30 +1,30 @@ #! /bin/sh -set -e +set -eu set -o pipefail -if [ "${S3_ACCESS_KEY_ID}" = "**None**" ]; then +if [ -z "$S3_ACCESS_KEY_ID" ]; then echo "You need to set the S3_ACCESS_KEY_ID environment variable." exit 1 fi -if [ "${S3_SECRET_ACCESS_KEY}" = "**None**" ]; then +if [ -z "$S3_SECRET_ACCESS_KEY" ]; then echo "You need to set the S3_SECRET_ACCESS_KEY environment variable." exit 1 fi -if [ "${S3_BUCKET}" = "**None**" ]; then +if [ -z "$S3_BUCKET" ]; then echo "You need to set the S3_BUCKET environment variable." exit 1 fi -if [ "${POSTGRES_DATABASE}" = "**None**" ]; then +if [ -z "$POSTGRES_DATABASE" ]; then echo "You need to set the POSTGRES_DATABASE environment variable." exit 1 fi -if [ "${POSTGRES_HOST}" = "**None**" ]; then - if [ -n "${POSTGRES_PORT_5432_TCP_ADDR}" ]; then +if [ -z "$POSTGRES_HOST" ]; then + if [ -n "$POSTGRES_PORT_5432_TCP_ADDR" ]; then POSTGRES_HOST=$POSTGRES_PORT_5432_TCP_ADDR POSTGRES_PORT=$POSTGRES_PORT_5432_TCP_PORT else @@ -33,49 +33,63 @@ if [ "${POSTGRES_HOST}" = "**None**" ]; then fi fi -if [ "${POSTGRES_USER}" = "**None**" ]; then +if [ -z "$POSTGRES_USER" ]; then echo "You need to set the POSTGRES_USER environment variable." exit 1 fi -if [ "${POSTGRES_PASSWORD}" = "**None**" ]; then - echo "You need to set the POSTGRES_PASSWORD environment variable or link to a container named POSTGRES." +if [ -z "$POSTGRES_PASSWORD" ]; then + echo "You need to set the POSTGRES_PASSWORD environment variable" \ + "or link to a container named POSTGRES." exit 1 fi -if [ "${S3_ENDPOINT}" == "**None**" ]; then - AWS_ARGS="" +if [ -z "$S3_ENDPOINT" ]; then + aws_args="" else - AWS_ARGS="--endpoint-url ${S3_ENDPOINT}" + aws_args="--endpoint-url $S3_ENDPOINT" fi -# env vars needed for aws tools + export AWS_ACCESS_KEY_ID=$S3_ACCESS_KEY_ID export AWS_SECRET_ACCESS_KEY=$S3_SECRET_ACCESS_KEY export AWS_DEFAULT_REGION=$S3_REGION - export PGPASSWORD=$POSTGRES_PASSWORD -POSTGRES_HOST_OPTS="-h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER" -echo "Finding latest backup" +s3_uri_base="s3://${S3_BUCKET}/${S3_PREFIX}" -LATEST_BACKUP=$(aws $AWS_ARGS s3 ls s3://$S3_BUCKET/$S3_PREFIX/ | sort | tail -n 1 | awk '{ print $4 }') - -echo "Fetching ${LATEST_BACKUP} from S3" - -aws $AWS_ARGS s3 cp s3://$S3_BUCKET/$S3_PREFIX/${LATEST_BACKUP} dump.sql.gz -gzip -d dump.sql.gz - -if [ "${DROP_PUBLIC}" == "yes" ]; then - echo "Recreating the public schema" - psql $POSTGRES_HOST_OPTS -d $POSTGRES_DATABASE -c "drop schema public cascade; create schema public;" +if [ -z "$PASSPHRASE" ]; then + file_type=".dump" +else + file_type=".dump.gpg" fi -echo "Restoring ${LATEST_BACKUP}" +if [ $# -eq 1 ]; then + timestamp="$1" + key_suffix="${POSTGRES_DATABASE}_${timestamp}${file_type}" +else + echo "Finding latest backup..." + key_suffix=$( + aws $aws_args s3 ls "${s3_uri_base}/" \ + | sort \ + | tail -n 1 \ + | awk '{ print $4 }' + ) +fi -psql $POSTGRES_HOST_OPTS -d $POSTGRES_DATABASE < dump.sql +echo "Fetching backup from S3..." +aws $aws_args s3 cp "${s3_uri_base}/${key_suffix}" "db${file_type}" -rm dump.sql +if [ -n "$PASSPHRASE" ]; then + echo "Decrypting backup..." + gpg --decrypt --batch --passphrase "$PASSPHRASE" db.dump.gpg > db.dump + rm db.dump.gpg +fi -echo "Restore complete" +conn_opts="-h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DATABASE" +echo "Restoring from backup..." +pg_restore $conn_opts --single-transaction --clean db.dump +rm db.dump + +echo "Restore complete." diff --git a/src/run.sh b/src/run.sh index e3fb3c8..51b8617 100644 --- a/src/run.sh +++ b/src/run.sh @@ -1,13 +1,13 @@ #! /bin/sh -set -e +set -eu -if [ "${S3_S3V4}" = "yes" ]; then +if [ "$S3_S3V4" = "yes" ]; then aws configure set default.s3.signature_version s3v4 fi -if [ "${SCHEDULE}" = "**None**" ]; then - sh backup.sh +if [ -z "$SCHEDULE" ]; then + echo "You need to set the SCHEDULE environment variable." else exec go-cron "$SCHEDULE" /bin/sh backup.sh fi diff --git a/template.Dockerfile b/template.Dockerfile index 038dc1e..fd33ef8 100644 --- a/template.Dockerfile +++ b/template.Dockerfile @@ -3,21 +3,21 @@ FROM alpine:{alpine_version} ADD install.sh install.sh RUN sh install.sh && rm install.sh -ENV POSTGRES_DATABASE **None** -ENV POSTGRES_HOST **None** +ENV POSTGRES_DATABASE '' +ENV POSTGRES_HOST '' ENV POSTGRES_PORT 5432 -ENV POSTGRES_USER **None** -ENV POSTGRES_PASSWORD **None** -ENV POSTGRES_EXTRA_OPTS '' -ENV S3_ACCESS_KEY_ID **None** -ENV S3_SECRET_ACCESS_KEY **None** -ENV S3_BUCKET **None** -ENV S3_REGION us-west-1 +ENV POSTGRES_USER '' +ENV POSTGRES_PASSWORD '' +ENV PGDUMP_EXTRA_OPTS '' +ENV S3_ACCESS_KEY_ID '' +ENV S3_SECRET_ACCESS_KEY '' +ENV S3_BUCKET '' +ENV S3_REGION 'us-west-1' ENV S3_PATH 'backup' -ENV S3_ENDPOINT **None** -ENV S3_S3V4 no -ENV SCHEDULE **None** -ENV DROP_PUBLIC no +ENV S3_ENDPOINT '' +ENV S3_S3V4 'no' +ENV SCHEDULE '' +ENV PASSPHRASE '' ADD run.sh run.sh ADD backup.sh backup.sh