#!/usr/local/bin/python # simple python module to create incremental backups of zfs snapshots from # elab file systems import pathlib import subprocess SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key" SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de" REMOTE_PATH = "zfs-backups" SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/" ZFS_POOL = "Datenspeicher" ZFS_ELAB_PREFIX = "elabfs-" TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups" def call(arguments, as_text=False): """ run a command line argument simple wrapper around subprocess.run() with some sensible defaults :params arguments: list of command line arguments and parameters :params as_text: should the output treated as text :returns: bytesarray or string (if as_text is trueish) :raises subprocess.CalledProcessError: if command has not an exit value of 0 """ result = subprocess.run( arguments, check=True, stdout=subprocess.PIPE, universal_newlines=as_text, ) return result.stdout def remote_call(arguments): """ makes runs an command on the remote backup server :params arguments: list of command line arguments and parameters :returns: string of the command output :raises subprocess.CalledProcessError: if command has not an exit value of 0 """ cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE] cmd.extend(arguments) try: result = call(cmd, as_text=True) except subprocess.CalledProcessError as e: print("CMD:", e.cmd) print("STDERR:", e.stderr) raise def clean_split(text): """ splits a text on whitespace, only returns nonempty items as list_elab_snapshots() :params text: string to split :returns: list of nonempty items :raises subprocess.CalledProcessError: if command has not an exit value of 0 """ items = (item.strip() for item in text.split()) return [item for item in items if item] def list_snapshots(): """ lists the available zfs snapshots for the ZFS_POOL :returns: list of snapshots :raises subprocess.CalledProcessError: if command has not an exit value of 0 """ cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL] return clean_split(call(cmd, as_text=True)) def list_elab_snapshots(): """ lists the zfs snapshots for elab file systems :returns: dict of snapshot lists with the elab member as key :raises subprocess.CalledProcessError: if command has not an exit value of 0 """ result = {} for snapshot in list_snapshots(): pool, snap_name = snapshot.split("/", 1) if snap_name.startswith(ZFS_ELAB_PREFIX): prefix_and_member, _ = snap_name.rsplit("@", 1) member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "") if member not in result: result[member] = [] result[member].append(snapshot) return result def snapshot_short_name(full_snapshot_name): """ extracts the short name of a snapshot from the full reference e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be translated to "elabfs-LukasMetzler@auto-20190806.0200-1w" :params full_snapshot_name: string of the full snapshot reference :returns string: snapshot name without pool identification """ pool, name = full_snapshot_name.split("/", 1) return name def backup_filename(current, last=None): """ returns the filename for a full or incremental backup if only the current snapshot is provided, the filename will be in the format for a full backup, if the reference for the last backup is provided, the filename will be in the format of a incremental backup :params current: full reference to the current zfs snapshot :params last: (optional) full reference to the last backuped zfs snapshot :returns: filename of the backup file """ current_name = snapshot_short_name(current) if last is None: return f"{current_name}.gz" else: last_name = snapshot_short_name(last) return f"{last_name}.to.{current_name}.gz" def extract_snapshot_name(filename): """ returns the target snapshot name from a backup filename :params filename: name of an backup file :returns: short name of the backed up zfs snapshot :raises: ValueError if format of filename does not match """ if not filename.endswith(".gz"): raise ValueError(f"Not a gzip file: {filename}") if not filename.startswith(ZFS_ELAB_PREFIX): raise ValueError(f"Not an elabfs snapshot: {filename}") snapshot = filename[:-3] if snapshot.count(".") == 1 and snapshot.count("@") == 1: # elabfs-...@auto-20190807.0200-1w return snapshot elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2: # elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w old_snapshot, new_snapshot = snapshot.split(".to.") return new_snapshot else: raise ValueError(f"Unknown Filename Format: {filename}") def list_remote_backups(members): """ list the available backup files by elab members The elab members for whom current zfs snapshots are available must be supplied, so only these get queried. If backups of snapshots from former members are still on the backup server, these will be ignored. :params members: list of elab members that have zfs snapshots to backup :returns: dict of set with backup entries for each elab member """ result = {} for member in members: remote_sub_dir = f"{REMOTE_PATH}/{member}" try: backups = clean_split(remote_call(["ls", remote_sub_dir])) result[member] = set((extract_snapshot_name(i) for i in backups)) except subprocess.CalledProcessError: remote_call(["mkdir", remote_sub_dir]) result[member] = set() return result def backup_latest_snapshot(member, elab_snapshots, existing_backups): """ backup the latest zfs snapshot for an elab member This will try to create an incremental backup but will fall back to a full backup if it is not possible. :params member: name of the elab member :params elab_snapshots: list of currently available snapshots for the member :params existing_backups: set of available backup names """ print(f"backing up member {member}") snapshots = sorted(elab_snapshots, reverse=True) current_snapshot = snapshots[0] latest_backup = None for snapshot in snapshots: if snapshot_short_name(snapshot) in existing_backups: latest_backup = snapshot break if current_snapshot == latest_backup: # nothing to back up print(f"- nothing to backup, latest snapshot: {current_snapshot}") return elif latest_backup is None: # no snapshot was found in backups, make a full backup for consistency send_cmd = ["zfs", "send", current_snapshot] gzip_tmp_filename = backup_filename(current_snapshot) print(f" - full backup, latest snapshot: {current_snapshot}") else: # make an incremental backup print( f" - incremental backup, from: {latest_backup} to: {current_snapshot}" ) gzip_tmp_filename = backup_filename( current=current_snapshot, last=latest_backup ) send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot] # create the backup tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename print(f" - generating temporary backup file {tmp_gzip_filepath.name}") #with open(tmp_gzip_filepath, "wb") as file_handle: with open(tmp_gzip_filepath, "wb") as file_handle: gzip_in = subprocess.Popen( "gzip", stdin=subprocess.PIPE, stdout=file_handle ).stdin subprocess.run(send_cmd, stdout=gzip_in, check=True) def create_backups(): """ batch create backups for all available elab snapshots""" elab_snapshots = list_elab_snapshots() existing_backups = list_remote_backups(elab_snapshots.keys()) for member, snapshots in elab_snapshots.items(): members_backups = existing_backups.get(member, []) backup_latest_snapshot(member, snapshots, members_backups) if __name__ == "__main__": # run the batch backup function if the file is called create_backups()