|
|
|
#!/usr/local/bin/python
|
|
|
|
|
|
|
|
# simple python module to create incremental backups of zfs snapshots from
|
|
|
|
# elab file systems
|
|
|
|
|
|
|
|
import pathlib
|
|
|
|
import subprocess
|
|
|
|
|
|
|
|
SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key"
|
|
|
|
SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de"
|
|
|
|
|
|
|
|
REMOTE_PATH = "zfs-backups"
|
|
|
|
SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/"
|
|
|
|
|
|
|
|
ZFS_POOL = "Datenspeicher"
|
|
|
|
ZFS_ELAB_PREFIX = "elabfs-"
|
|
|
|
|
|
|
|
TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups"
|
|
|
|
|
|
|
|
|
|
|
|
def call(arguments, as_text=False):
|
|
|
|
""" run a command line argument
|
|
|
|
|
|
|
|
simple wrapper around subprocess.run() with some sensible defaults
|
|
|
|
|
|
|
|
:params arguments: list of command line arguments and parameters
|
|
|
|
:params as_text: should the output treated as text
|
|
|
|
:returns: bytesarray or string (if as_text is trueish)
|
|
|
|
:raises subprocess.CalledProcessError: if command has not an exit value of 0
|
|
|
|
"""
|
|
|
|
result = subprocess.run(
|
|
|
|
arguments,
|
|
|
|
check=True,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
universal_newlines=as_text,
|
|
|
|
)
|
|
|
|
return result.stdout
|
|
|
|
|
|
|
|
|
|
|
|
def remote_call(arguments):
|
|
|
|
""" makes runs an command on the remote backup server
|
|
|
|
|
|
|
|
:params arguments: list of command line arguments and parameters
|
|
|
|
:returns: string of the command output
|
|
|
|
:raises subprocess.CalledProcessError: if command has not an exit value of 0
|
|
|
|
"""
|
|
|
|
cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE]
|
|
|
|
cmd.extend(arguments)
|
|
|
|
return call(cmd, as_text=True)
|
|
|
|
|
|
|
|
|
|
|
|
def clean_split(text):
|
|
|
|
""" splits a text on whitespace, only returns nonempty items as list_elab_snapshots()
|
|
|
|
|
|
|
|
:params text: string to split
|
|
|
|
:returns: list of nonempty items
|
|
|
|
:raises subprocess.CalledProcessError: if command has not an exit value of 0
|
|
|
|
"""
|
|
|
|
items = (item.strip() for item in text.split())
|
|
|
|
return [item for item in items if item]
|
|
|
|
|
|
|
|
|
|
|
|
def list_snapshots():
|
|
|
|
""" lists the available zfs snapshots for the ZFS_POOL
|
|
|
|
|
|
|
|
:returns: list of snapshots
|
|
|
|
:raises subprocess.CalledProcessError: if command has not an exit value of 0
|
|
|
|
"""
|
|
|
|
cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL]
|
|
|
|
return clean_split(call(cmd, as_text=True))
|
|
|
|
|
|
|
|
|
|
|
|
def list_elab_snapshots():
|
|
|
|
""" lists the zfs snapshots for elab file systems
|
|
|
|
|
|
|
|
:returns: dict of snapshot lists with the elab member as key
|
|
|
|
:raises subprocess.CalledProcessError: if command has not an exit value of 0
|
|
|
|
"""
|
|
|
|
result = {}
|
|
|
|
for snapshot in list_snapshots():
|
|
|
|
pool, snap_name = snapshot.split("/", 1)
|
|
|
|
if snap_name.startswith(ZFS_ELAB_PREFIX):
|
|
|
|
prefix_and_member, _ = snap_name.rsplit("@", 1)
|
|
|
|
member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "")
|
|
|
|
if member not in result:
|
|
|
|
result[member] = []
|
|
|
|
result[member].append(snapshot)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def snapshot_short_name(full_snapshot_name):
|
|
|
|
""" extracts the short name of a snapshot from the full reference
|
|
|
|
|
|
|
|
e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be
|
|
|
|
translated to "elabfs-LukasMetzler@auto-20190806.0200-1w"
|
|
|
|
|
|
|
|
:params full_snapshot_name: string of the full snapshot reference
|
|
|
|
:returns string: snapshot name without pool identification
|
|
|
|
"""
|
|
|
|
pool, name = full_snapshot_name.split("/", 1)
|
|
|
|
return name
|
|
|
|
|
|
|
|
:
|
|
|
|
def backup_filename(current, last=None):
|
|
|
|
""" returns the filename for a full or incremental backup
|
|
|
|
|
|
|
|
if only the current snapshot is provided, the filename will be in the
|
|
|
|
format for a full backup, if the reference for the last backup is provided,
|
|
|
|
the filename will be in the format of a incremental backup
|
|
|
|
|
|
|
|
:params current: full reference to the current zfs snapshot
|
|
|
|
:params last: (optional) full reference to the last backuped zfs snapshot
|
|
|
|
:returns: filename of the backup file
|
|
|
|
"""
|
|
|
|
current_name = snapshot_short_name(current)
|
|
|
|
if last is None:
|
|
|
|
return f"{current_name}.gz"
|
|
|
|
else:
|
|
|
|
last_name = snapshot_short_name(last)
|
|
|
|
return f"{last_name}.to.{current_name}.gz"
|
|
|
|
|
|
|
|
|
|
|
|
def extract_snapshot_name(filename):
|
|
|
|
""" returns the target snapshot name from a backup filename
|
|
|
|
|
|
|
|
:params filename: name of an backup file
|
|
|
|
:returns: short name of the backed up zfs snapshot
|
|
|
|
:raises: ValueError if format of filename does not match
|
|
|
|
"""
|
|
|
|
if not filename.endswith(".gz"):
|
|
|
|
raise ValueError(f"Not a gzip file: {filename}")
|
|
|
|
if not filename.startswith(ZFS_ELAB_PREFIX):
|
|
|
|
raise ValueError(f"Not an elabfs snapshot: {filename}")
|
|
|
|
snapshot = filename[:-3]
|
|
|
|
if snapshot.count(".") == 1 and snapshot.count("@") == 1:
|
|
|
|
# elabfs-...@auto-20190807.0200-1w
|
|
|
|
return snapshot
|
|
|
|
elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2:
|
|
|
|
# elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w
|
|
|
|
old_snapshot, new_snapshot = snapshot.split(".to.")
|
|
|
|
return new_snapshot
|
|
|
|
else:
|
|
|
|
raise ValueError(f"Unknown Filename Format: {filename}")
|
|
|
|
|
|
|
|
|
|
|
|
def list_remote_backups(members):
|
|
|
|
""" list the available backup files by elab members
|
|
|
|
|
|
|
|
The elab members for whom current zfs snapshots are available must be
|
|
|
|
supplied, so only these get queried. If backups of snapshots from former
|
|
|
|
members are still on the backup server, these will be ignored.
|
|
|
|
|
|
|
|
:params members: list of elab members that have zfs snapshots to backup
|
|
|
|
:returns: dict of set with backup entries for each elab member
|
|
|
|
"""
|
|
|
|
result = {}
|
|
|
|
for member in members:
|
|
|
|
remote_sub_dir = f"{REMOTE_PATH}/{member}"
|
|
|
|
try:
|
|
|
|
backups = clean_split(remote_call(["ls", remote_sub_dir]))
|
|
|
|
result[member] = set((extract_snapshot_name(i) for i in backups))
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
remote_call(["mkdir", remote_sub_dir])
|
|
|
|
result[member] = set()
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def backup_latest_snapshot(member, elab_snapshots, existing_backups):
|
|
|
|
""" backup the latest zfs snapshot for an elab member
|
|
|
|
|
|
|
|
This will try to create an incremental backup but will fall back to a full
|
|
|
|
backup if it is not possible.
|
|
|
|
|
|
|
|
:params member: name of the elab member
|
|
|
|
:params elab_snapshots: list of currently available snapshots for the member
|
|
|
|
:params existing_backups: set of available backup names
|
|
|
|
|
|
|
|
"""
|
|
|
|
print(f"backing up member {member}")
|
|
|
|
snapshots = sorted(elab_snapshots, reverse=True)
|
|
|
|
current_snapshot = snapshots[0]
|
|
|
|
latest_backup = None
|
|
|
|
for snapshot in snapshots:
|
|
|
|
if snapshot_short_name(snapshot) in existing_backups:
|
|
|
|
latest_backup = snapshot
|
|
|
|
break
|
|
|
|
if current_snapshot == latest_backup:
|
|
|
|
# nothing to back up
|
|
|
|
print(f"- nothing to backup, latest snapshot: {current_snapshot}")
|
|
|
|
return
|
|
|
|
elif latest_backup is None:
|
|
|
|
# no snapshot was found in backups, make a full backup for consistency
|
|
|
|
send_cmd = ["zfs", "send", current_snapshot]
|
|
|
|
gzip_tmp_filename = backup_filename(current_snapshot)
|
|
|
|
print(f" - full backup, latest snapshot: {current_snapshot}")
|
|
|
|
else:
|
|
|
|
# make an incremental backup
|
|
|
|
print(
|
|
|
|
f" - incremental backup, from: {latest_backup} to: {current_snapshot}"
|
|
|
|
)
|
|
|
|
gzip_tmp_filename = backup_filename(
|
|
|
|
current=current_snapshot, last=latest_backup
|
|
|
|
)
|
|
|
|
send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot]
|
|
|
|
|
|
|
|
# create the backup
|
|
|
|
tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename
|
|
|
|
print(f" - generating temporary backup file {tmp_gzip_filepath.name}")
|
|
|
|
with open(tmp_gzip_filepath, "wb") as file_handle:
|
|
|
|
gzip_in = subprocess.Popen(
|
|
|
|
"gzip", stdin=subprocess.PIPE, stdout=file_handle
|
|
|
|
).stdin
|
|
|
|
subprocess.run(send_cmd, stdout=gzip_in, check=True)
|
|
|
|
|
|
|
|
# copy the backup to the remote server
|
|
|
|
print(" - copying temporary backup file")
|
|
|
|
remote_url = (
|
|
|
|
f"{SSH_REMOTE}:~/{REMOTE_PATH}/{member}/{tmp_gzip_filepath.name}"
|
|
|
|
)
|
|
|
|
copy_cmd = ["scp", "-i", SSH_KEY_FILE, str(tmp_gzip_filepath), remote_url]
|
|
|
|
call(copy_cmd)
|
|
|
|
|
|
|
|
# remove the temporary file
|
|
|
|
print(" - removing temporary backup file")
|
|
|
|
tmp_gzip_filepath.unlink()
|
|
|
|
|
|
|
|
|
|
|
|
def create_backups():
|
|
|
|
""" batch create backups for all available elab snapshots"""
|
|
|
|
elab_snapshots = list_elab_snapshots()
|
|
|
|
existing_backups = list_remote_backups(elab_snapshots.keys())
|
|
|
|
for member, snapshots in elab_snapshots.items():
|
|
|
|
members_backups = existing_backups.get(member, [])
|
|
|
|
backup_latest_snapshot(member, snapshots, members_backups)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# run the batch backup function if the file is called
|
|
|
|
create_backups()
|