import pathlib import subprocess SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key" SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de" REMOTE_PATH = "zfs-backups" SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/" ZFS_POOL = "Datenspeicher" ZFS_ELAB_PREFIX = "elabfs-" TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups" def call(arguments, as_text=False): result = subprocess.run( arguments, check=True, stdout=subprocess.PIPE, universal_newlines=as_text ) return result.stdout def remote_call(arguments): cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE] cmd.extend(arguments) return call(cmd, as_text=True) def clean_split(text): items = (item.strip() for item in text.split()) return [item for item in items if item] def list_snapshots(): cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL] return clean_split(call(cmd, as_text=True)) def elab_snapshots(): result = {} for snapshot in list_snapshots(): pool, snap_name = snapshot.split("/", 1) if snap_name.startswith(ZFS_ELAB_PREFIX): prefix_and_member, _ = snap_name.rsplit("@", 1) member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "") if member not in result: result[member] = [] result[member].append(snapshot) return result def gzip_filename(name): return f"{name}.gz" def gzip_filepath(name): return pathlib.Path(TMP_BACKUP_FOLDER) / gzip_filename(name) def list_remote_backups(members): result = {} for member in members: remote_sub_dir = f"{REMOTE_PATH}/{member}" try: backups = clean_split(remote_call(["ls", remote_sub_dir])) result[member] = set(backups) except CalledProcessError: remote_call(["mkdir", remote_sub_dir]) result[member] = set() def backup_latest_snapshot(member, elab_snapshots, existing_backups): print(f"backing up member {member}") snapshots = sorted(elab_snapshots, reverse=True) current_snapshot = snapshots[0] latest_backup = None for snapshot in snapshots: if gzip_filename(snapshot) in existing_backups: latest_backup = snapshot break if current_snapshot == latest_backup: # nothing to back up print("- nothing to backup, latest snapshot: {current_snapshot}") return elif latest_backup is None: # no snapshot was found in backups, make a full backup for consistency send_cmd = ["zfs", "send", current_snapshot] print(" - full backup, latest snapshot: {current_snapshot}") else: # make an incremental backup print( " - incremental backup, from: {latest_backup} to: {current_snapshot}" ) send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot] # create the backup print(" - generating temporary backup file") tmp_gzip_filepath = gzip_filepath(current_snapshot) with open(tmp_gzip_filepath, "wb") as file_handle: gzip_in = subprocess.Popen( "gzip", stdin=subprocess.PIPE, stdout=file_handle, check=True ).stdin subprocess.call(send_cmd, stdout=gzip_in, check=True) # copy the backup to the remote server print(" - copying temporary backup file") remote_url = ( f"{SSH_REMOTE}:~/{REMOTE_PATH}/{member}/{tmp_gzip_filepath.name}" ) copy_cmd = ["scp", "-i", SSH_KEY_FILE, str(tmp_gzip_filepath), remote_url] call(copy_cmd) # remove the temporary file print(" - removing temporary backup file") tmp_gzip_filepath.unlink() def create_backups(): elab_snapshots = list_snapshots() existing_backups = list_remote_backups(elab_snapshots.keys()) for member, snapshots in elab_snapshots.items(): members_backups = existing_backups.get(member, []) backup_latest_snapshot(member, snapshots, members_backups) if __name__ == "__main__": es = elab_snapshots() print(list_remote_backups(es.keys())