Automatically create (incremental) backups of zfs snapshots on a file server.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

228 lines
8.1 KiB

#!/usr/local/bin/python
# simple python module to create incremental backups of zfs snapshots from
# elab file systems
import pathlib
import subprocess
SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key"
SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de"
REMOTE_PATH = "zfs-backups"
SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/"
ZFS_POOL = "Datenspeicher"
ZFS_ELAB_PREFIX = "elabfs-"
TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups"
def call(arguments, as_text=False):
""" run a command line argument
simple wrapper around subprocess.run() with some sensible defaults
:params arguments: list of command line arguments and parameters
:params as_text: should the output treated as text
:returns: bytesarray or string (if as_text is trueish)
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
result = subprocess.run(
arguments,
check=True,
stdout=subprocess.PIPE,
universal_newlines=as_text,
)
return result.stdout
def remote_call(arguments):
""" makes runs an command on the remote backup server
:params arguments: list of command line arguments and parameters
:returns: string of the command output
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE]
cmd.extend(arguments)
return call(cmd, as_text=True)
def clean_split(text):
""" splits a text on whitespace, only returns nonempty items as list_elab_snapshots()
:params text: string to split
:returns: list of nonempty items
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
items = (item.strip() for item in text.split())
return [item for item in items if item]
def list_snapshots():
""" lists the available zfs snapshots for the ZFS_POOL
:returns: list of snapshots
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL]
return clean_split(call(cmd, as_text=True))
def list_elab_snapshots():
""" lists the zfs snapshots for elab file systems
:returns: dict of snapshot lists with the elab member as key
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
result = {}
for snapshot in list_snapshots():
pool, snap_name = snapshot.split("/", 1)
if snap_name.startswith(ZFS_ELAB_PREFIX):
prefix_and_member, _ = snap_name.rsplit("@", 1)
member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "")
if member not in result:
result[member] = []
result[member].append(snapshot)
return result
def snapshot_short_name(full_snapshot_name):
""" extracts the short name of a snapshot from the full reference
e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be
translated to "elabfs-LukasMetzler@auto-20190806.0200-1w"
:params full_snapshot_name: string of the full snapshot reference
:returns string: snapshot name without pool identification
"""
pool, name = full_snapshot_name.split("/", 1)
return name
def backup_filename(current, last=None):
""" returns the filename for a full or incremental backup
if only the current snapshot is provided, the filename will be in the
format for a full backup, if the reference for the last backup is provided,
the filename will be in the format of a incremental backup
:params current: full reference to the current zfs snapshot
:params last: (optional) full reference to the last backuped zfs snapshot
:returns: filename of the backup file
"""
current_name = snapshot_short_name(current)
if last is None:
return f"{current_name}.gz"
else:
last_name = snapshot_short_name(last)
return f"{last_name}.to.{current_name}.gz"
def extract_snapshot_name(filename):
""" returns the target snapshot name from a backup filename
:params filename: name of an backup file
:returns: short name of the backed up zfs snapshot
:raises: ValueError if format of filename does not match
"""
if not filename.endswith(".gz"):
raise ValueError(f"Not a gzip file: {filename}")
if not filename.startswith(ZFS_ELAB_PREFIX):
raise ValueError(f"Not an elabfs snapshot: {filename}")
snapshot = filename[:-3]
if snapshot.count(".") == 1 and snapshot.count("@") == 1:
# elabfs-...@auto-20190807.0200-1w
return snapshot
elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2:
# elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w
old_snapshot, new_snapshot = snapshot.split(".to.")
return new_snapshot
else:
raise ValueError(f"Unknown Filename Format: {filename}")
def list_remote_backups(members):
""" list the available backup files by elab members
The elab members for whom current zfs snapshots are available must be
supplied, so only these get queried. If backups of snapshots from former
members are still on the backup server, these will be ignored.
:params members: list of elab members that have zfs snapshots to backup
:returns: dict of set with backup entries for each elab member
"""
result = {}
for member in members:
remote_sub_dir = f"{REMOTE_PATH}/{member}"
try:
backups = clean_split(remote_call(["ls", remote_sub_dir]))
result[member] = set((extract_snapshot_name(i) for i in backups))
except subprocess.CalledProcessError:
remote_call(["mkdir", remote_sub_dir])
result[member] = set()
return result
def backup_latest_snapshot(member, elab_snapshots, existing_backups):
""" backup the latest zfs snapshot for an elab member
This will try to create an incremental backup but will fall back to a full
backup if it is not possible.
:params member: name of the elab member
:params elab_snapshots: list of currently available snapshots for the member
:params existing_backups: set of available backup names
"""
print(f"backing up member {member}")
snapshots = sorted(elab_snapshots, reverse=True)
current_snapshot = snapshots[0]
latest_backup = None
for snapshot in snapshots:
if snapshot_short_name(snapshot) in existing_backups:
latest_backup = snapshot
break
if current_snapshot == latest_backup:
# nothing to back up
print(f"- nothing to backup, latest snapshot: {current_snapshot}")
return
elif latest_backup is None:
# no snapshot was found in backups, make a full backup for consistency
send_cmd = ["zfs", "send", current_snapshot]
gzip_tmp_filename = backup_filename(current_snapshot)
print(f" - full backup, latest snapshot: {current_snapshot}")
else:
# make an incremental backup
print(
f" - incremental backup, from: {latest_backup} to: {current_snapshot}"
)
gzip_tmp_filename = backup_filename(
current=current_snapshot, last=latest_backup
)
send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot]
# create the backup
tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename
print(f" - generating temporary backup file {tmp_gzip_filepath.name}")
#with open(tmp_gzip_filepath, "wb") as file_handle:
with open(tmp_gzip_filepath, "wb") as file_handle:
gzip_in = subprocess.Popen(
"gzip", stdin=subprocess.PIPE, stdout=file_handle
).stdin
subprocess.run(send_cmd, stdout=gzip_in, check=True)
def create_backups():
""" batch create backups for all available elab snapshots"""
elab_snapshots = list_elab_snapshots()
existing_backups = list_remote_backups(elab_snapshots.keys())
for member, snapshots in elab_snapshots.items():
members_backups = existing_backups.get(member, [])
backup_latest_snapshot(member, snapshots, members_backups)
if __name__ == "__main__":
# run the batch backup function if the file is called
create_backups()