Automatically create (incremental) backups of zfs snapshots on a file server.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
8.3 KiB

#!/usr/local/bin/python
# simple python module to create incremental backups of zfs snapshots from
# elab file systems
5 years ago
import pathlib
import subprocess
SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key"
SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de"
REMOTE_PATH = "zfs-backups"
SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/"
ZFS_POOL = "Datenspeicher"
ZFS_ELAB_PREFIX = "elabfs-"
TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups"
def call(arguments, as_text=False):
""" run a command line argument
simple wrapper around subprocess.run() with some sensible defaults
:params arguments: list of command line arguments and parameters
:params as_text: should the output treated as text
:returns: bytesarray or string (if as_text is trueish)
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
5 years ago
result = subprocess.run(
arguments,
check=True,
stdout=subprocess.PIPE,
universal_newlines=as_text,
5 years ago
)
return result.stdout
def remote_call(arguments):
""" makes runs an command on the remote backup server
:params arguments: list of command line arguments and parameters
:returns: string of the command output
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
5 years ago
cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE]
cmd.extend(arguments)
try:
result = call(cmd, as_text=True)
except subprocess.CalledProcessError as e:
print("STDERR:", e.stderr)
raise
5 years ago
def clean_split(text):
""" splits a text on whitespace, only returns nonempty items as list_elab_snapshots()
:params text: string to split
:returns: list of nonempty items
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
5 years ago
items = (item.strip() for item in text.split())
5 years ago
return [item for item in items if item]
def list_snapshots():
""" lists the available zfs snapshots for the ZFS_POOL
:returns: list of snapshots
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
5 years ago
cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL]
5 years ago
return clean_split(call(cmd, as_text=True))
5 years ago
def list_elab_snapshots():
""" lists the zfs snapshots for elab file systems
:returns: dict of snapshot lists with the elab member as key
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
5 years ago
result = {}
for snapshot in list_snapshots():
pool, snap_name = snapshot.split("/", 1)
if snap_name.startswith(ZFS_ELAB_PREFIX):
prefix_and_member, _ = snap_name.rsplit("@", 1)
member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "")
if member not in result:
result[member] = []
result[member].append(snapshot)
return result
def snapshot_short_name(full_snapshot_name):
""" extracts the short name of a snapshot from the full reference
e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be
translated to "elabfs-LukasMetzler@auto-20190806.0200-1w"
:params full_snapshot_name: string of the full snapshot reference
:returns string: snapshot name without pool identification
"""
pool, name = full_snapshot_name.split("/", 1)
return name
5 years ago
5 years ago
def backup_filename(current, last=None):
""" returns the filename for a full or incremental backup
if only the current snapshot is provided, the filename will be in the
format for a full backup, if the reference for the last backup is provided,
the filename will be in the format of a incremental backup
5 years ago
:params current: full reference to the current zfs snapshot
:params last: (optional) full reference to the last backuped zfs snapshot
:returns: filename of the backup file
"""
current_name = snapshot_short_name(current)
if last is None:
return f"{current_name}.gz"
else:
5 years ago
last_name = snapshot_short_name(last)
return f"{last_name}.to.{current_name}.gz"
def extract_snapshot_name(filename):
""" returns the target snapshot name from a backup filename
:params filename: name of an backup file
:returns: short name of the backed up zfs snapshot
:raises: ValueError if format of filename does not match
"""
if not filename.endswith(".gz"):
raise ValueError(f"Not a gzip file: {filename}")
if not filename.startswith(ZFS_ELAB_PREFIX):
raise ValueError(f"Not an elabfs snapshot: {filename}")
snapshot = filename[:-3]
if snapshot.count(".") == 1 and snapshot.count("@") == 1:
# elabfs-...@auto-20190807.0200-1w
return snapshot
elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2:
# elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w
old_snapshot, new_snapshot = snapshot.split(".to.")
return new_snapshot
else:
raise ValueError(f"Unknown Filename Format: {filename}")
5 years ago
def list_remote_backups(members):
""" list the available backup files by elab members
The elab members for whom current zfs snapshots are available must be
supplied, so only these get queried. If backups of snapshots from former
members are still on the backup server, these will be ignored.
:params members: list of elab members that have zfs snapshots to backup
:returns: dict of set with backup entries for each elab member
"""
5 years ago
result = {}
for member in members:
remote_sub_dir = f"{REMOTE_PATH}/{member}"
try:
backups = clean_split(remote_call(["ls", remote_sub_dir]))
result[member] = set((extract_snapshot_name(i) for i in backups))
except subprocess.CalledProcessError:
5 years ago
remote_call(["mkdir", remote_sub_dir])
result[member] = set()
return result
5 years ago
def backup_latest_snapshot(member, elab_snapshots, existing_backups):
""" backup the latest zfs snapshot for an elab member
This will try to create an incremental backup but will fall back to a full
backup if it is not possible.
:params member: name of the elab member
:params elab_snapshots: list of currently available snapshots for the member
:params existing_backups: set of available backup names
"""
5 years ago
print(f"backing up member {member}")
snapshots = sorted(elab_snapshots, reverse=True)
current_snapshot = snapshots[0]
latest_backup = None
for snapshot in snapshots:
if snapshot_short_name(snapshot) in existing_backups:
5 years ago
latest_backup = snapshot
break
if current_snapshot == latest_backup:
# nothing to back up
print(f"- nothing to backup, latest snapshot: {current_snapshot}")
5 years ago
return
elif latest_backup is None:
# no snapshot was found in backups, make a full backup for consistency
send_cmd = ["zfs", "send", current_snapshot]
gzip_tmp_filename = backup_filename(current_snapshot)
print(f" - full backup, latest snapshot: {current_snapshot}")
5 years ago
else:
# make an incremental backup
print(
f" - incremental backup, from: {latest_backup} to: {current_snapshot}"
5 years ago
)
gzip_tmp_filename = backup_filename(
current=current_snapshot, last=latest_backup
)
5 years ago
send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot]
# create the backup
tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename
print(f" - generating temporary backup file {tmp_gzip_filepath.name}")
#with open(tmp_gzip_filepath, "wb") as file_handle:
with open(tmp_gzip_filepath, "wb") as file_handle:
gzip_in = subprocess.Popen(
"gzip", stdin=subprocess.PIPE, stdout=file_handle
).stdin
subprocess.run(send_cmd, stdout=gzip_in, check=True)
5 years ago
def create_backups():
""" batch create backups for all available elab snapshots"""
elab_snapshots = list_elab_snapshots()
5 years ago
existing_backups = list_remote_backups(elab_snapshots.keys())
for member, snapshots in elab_snapshots.items():
members_backups = existing_backups.get(member, [])
backup_latest_snapshot(member, snapshots, members_backups)
5 years ago
if __name__ == "__main__":
# run the batch backup function if the file is called
create_backups()