#!/usr/local/bin/python import pathlib import subprocess SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key" SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de" REMOTE_PATH = "zfs-backups" SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/" ZFS_POOL = "Datenspeicher" ZFS_ELAB_PREFIX = "elabfs-" TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups" def call(arguments, as_text=False): result = subprocess.run( arguments, check=True, stdout=subprocess.PIPE, universal_newlines=as_text, ) return result.stdout def remote_call(arguments): cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE] cmd.extend(arguments) return call(cmd, as_text=True) def clean_split(text): items = (item.strip() for item in text.split()) return [item for item in items if item] def list_snapshots(): cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL] return clean_split(call(cmd, as_text=True)) def list_elab_snapshots(): result = {} for snapshot in list_snapshots(): pool, snap_name = snapshot.split("/", 1) if snap_name.startswith(ZFS_ELAB_PREFIX): prefix_and_member, _ = snap_name.rsplit("@", 1) member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "") if member not in result: result[member] = [] result[member].append(snapshot) return result def snapshot_short_name(full_snapshot_name): pool, name = full_snapshot_name.split("/", 1) return name def gzip_filename(current, last=None): current_name = snapshot_short_name(current) if last is None: return f"{current_name}.gz" else: last_name = snapshot_name(last) return f"{last_name}.to.{current_name}.gz" def extract_snapshot_name(filename): # elabfs-LukasMetzler@auto-20190807.0200-1w.gz if not filename.endswith(".gz"): raise ValueError(f"Not a gzip file: {filename}") if not filename.startswith(ZFS_ELAB_PREFIX): raise ValueError(f"Not an elabfs snapshot: {filename}") snapshot = filename[:-3] if snapshot.count(".") == 1 and snapshot.count("@") == 1: # elabfs-...@auto-20190807.0200-1w return snapshot elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2: # elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w old_snapshot, new_snapshot = snapshot.split(".to.") return new_snapshot else: raise ValueError(f"Unknown Filename Format: {filename}") def list_remote_backups(members): result = {} for member in members: remote_sub_dir = f"{REMOTE_PATH}/{member}" try: backups = clean_split(remote_call(["ls", remote_sub_dir])) result[member] = set((extract_snapshot_name(i) for i in backups)) except subprocess.CalledProcessError: remote_call(["mkdir", remote_sub_dir]) result[member] = set() return result def backup_latest_snapshot(member, elab_snapshots, existing_backups): print(f"backing up member {member}") snapshots = sorted(elab_snapshots, reverse=True) current_snapshot = snapshots[0] latest_backup = None for snapshot in snapshots: if snapshot_short_name(snapshot) in existing_backups: latest_backup = snapshot break if current_snapshot == latest_backup: # nothing to back up print(f"- nothing to backup, latest snapshot: {current_snapshot}") return elif latest_backup is None: # no snapshot was found in backups, make a full backup for consistency send_cmd = ["zfs", "send", current_snapshot] gzip_tmp_filename = gzip_filename(current_snapshot) print(f" - full backup, latest snapshot: {current_snapshot}") else: # make an incremental backup print( f" - incremental backup, from: {latest_backup} to: {current_snapshot}" ) gzip_tmp_filename = gzip_filename( current=current_snapshot, last=latest_backup ) send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot] # create the backup tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename print(f" - generating temporary backup file {tmp_gzip_filepath.name}") with open(tmp_gzip_filepath, "wb") as file_handle: gzip_in = subprocess.Popen( "gzip", stdin=subprocess.PIPE, stdout=file_handle ).stdin subprocess.run(send_cmd, stdout=gzip_in, check=True) # copy the backup to the remote server print(" - copying temporary backup file") remote_url = ( f"{SSH_REMOTE}:~/{REMOTE_PATH}/{member}/{tmp_gzip_filepath.name}" ) copy_cmd = ["scp", "-i", SSH_KEY_FILE, str(tmp_gzip_filepath), remote_url] call(copy_cmd) # remove the temporary file print(" - removing temporary backup file") tmp_gzip_filepath.unlink() def create_backups(): elab_snapshots = list_elab_snapshots() existing_backups = list_remote_backups(elab_snapshots.keys()) for member, snapshots in elab_snapshots.items(): members_backups = existing_backups.get(member, []) backup_latest_snapshot(member, snapshots, members_backups) if __name__ == "__main__": create_backups()