From e74b873921d9fe32c4b5fa2d093091c2a7494729 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 8 Aug 2019 11:15:44 +0200 Subject: [PATCH] added documentation to source file --- zfs-snapshot-backup.py | 87 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/zfs-snapshot-backup.py b/zfs-snapshot-backup.py index d9e0e1a..5aa109c 100755 --- a/zfs-snapshot-backup.py +++ b/zfs-snapshot-backup.py @@ -1,4 +1,8 @@ #!/usr/local/bin/python + +# simple python module to create incremental backups of zfs snapshots from +# elab file systems + import pathlib import subprocess @@ -15,6 +19,15 @@ TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups" def call(arguments, as_text=False): + """ run a command line argument + + simple wrapper around subprocess.run() with some sensible defaults + + :params arguments: list of command line arguments and parameters + :params as_text: should the output treated as text + :returns: bytesarray or string (if as_text is trueish) + :raises subprocess.CalledProcessError: if command has not an exit value of 0 + """ result = subprocess.run( arguments, check=True, @@ -25,22 +38,44 @@ def call(arguments, as_text=False): def remote_call(arguments): + """ makes runs an command on the remote backup server + + :params arguments: list of command line arguments and parameters + :returns: string of the command output + :raises subprocess.CalledProcessError: if command has not an exit value of 0 + """ cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE] cmd.extend(arguments) return call(cmd, as_text=True) def clean_split(text): + """ splits a text on whitespace, only returns nonempty items as list_elab_snapshots() + + :params text: string to split + :returns: list of nonempty items + :raises subprocess.CalledProcessError: if command has not an exit value of 0 + """ items = (item.strip() for item in text.split()) return [item for item in items if item] def list_snapshots(): + """ lists the available zfs snapshots for the ZFS_POOL + + :returns: list of snapshots + :raises subprocess.CalledProcessError: if command has not an exit value of 0 + """ cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL] return clean_split(call(cmd, as_text=True)) def list_elab_snapshots(): + """ lists the zfs snapshots for elab file systems + + :returns: dict of snapshot lists with the elab member as key + :raises subprocess.CalledProcessError: if command has not an exit value of 0 + """ result = {} for snapshot in list_snapshots(): pool, snap_name = snapshot.split("/", 1) @@ -54,11 +89,29 @@ def list_elab_snapshots(): def snapshot_short_name(full_snapshot_name): + """ extracts the short name of a snapshot from the full reference + + e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be + translated to "elabfs-LukasMetzler@auto-20190806.0200-1w" + + :params full_snapshot_name: string of the full snapshot reference + :returns string: snapshot name without pool identification + """ pool, name = full_snapshot_name.split("/", 1) return name +: +def backup_filename(current, last=None): + """ returns the filename for a full or incremental backup + + if only the current snapshot is provided, the filename will be in the + format for a full backup, if the reference for the last backup is provided, + the filename will be in the format of a incremental backup -def gzip_filename(current, last=None): + :params current: full reference to the current zfs snapshot + :params last: (optional) full reference to the last backuped zfs snapshot + :returns: filename of the backup file + """ current_name = snapshot_short_name(current) if last is None: return f"{current_name}.gz" @@ -68,7 +121,12 @@ def gzip_filename(current, last=None): def extract_snapshot_name(filename): - # elabfs-LukasMetzler@auto-20190807.0200-1w.gz + """ returns the target snapshot name from a backup filename + + :params filename: name of an backup file + :returns: short name of the backed up zfs snapshot + :raises: ValueError if format of filename does not match + """ if not filename.endswith(".gz"): raise ValueError(f"Not a gzip file: {filename}") if not filename.startswith(ZFS_ELAB_PREFIX): @@ -86,6 +144,15 @@ def extract_snapshot_name(filename): def list_remote_backups(members): + """ list the available backup files by elab members + + The elab members for whom current zfs snapshots are available must be + supplied, so only these get queried. If backups of snapshots from former + members are still on the backup server, these will be ignored. + + :params members: list of elab members that have zfs snapshots to backup + :returns: dict of set with backup entries for each elab member + """ result = {} for member in members: remote_sub_dir = f"{REMOTE_PATH}/{member}" @@ -99,6 +166,16 @@ def list_remote_backups(members): def backup_latest_snapshot(member, elab_snapshots, existing_backups): + """ backup the latest zfs snapshot for an elab member + + This will try to create an incremental backup but will fall back to a full + backup if it is not possible. + + :params member: name of the elab member + :params elab_snapshots: list of currently available snapshots for the member + :params existing_backups: set of available backup names + + """ print(f"backing up member {member}") snapshots = sorted(elab_snapshots, reverse=True) current_snapshot = snapshots[0] @@ -114,14 +191,14 @@ def backup_latest_snapshot(member, elab_snapshots, existing_backups): elif latest_backup is None: # no snapshot was found in backups, make a full backup for consistency send_cmd = ["zfs", "send", current_snapshot] - gzip_tmp_filename = gzip_filename(current_snapshot) + gzip_tmp_filename = backup_filename(current_snapshot) print(f" - full backup, latest snapshot: {current_snapshot}") else: # make an incremental backup print( f" - incremental backup, from: {latest_backup} to: {current_snapshot}" ) - gzip_tmp_filename = gzip_filename( + gzip_tmp_filename = backup_filename( current=current_snapshot, last=latest_backup ) send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot] @@ -149,6 +226,7 @@ def backup_latest_snapshot(member, elab_snapshots, existing_backups): def create_backups(): + """ batch create backups for all available elab snapshots""" elab_snapshots = list_elab_snapshots() existing_backups = list_remote_backups(elab_snapshots.keys()) for member, snapshots in elab_snapshots.items(): @@ -157,4 +235,5 @@ def create_backups(): if __name__ == "__main__": + # run the batch backup function if the file is called create_backups()