Browse Source

added documentation to source file

master
Holger Frey 5 years ago
parent
commit
e74b873921
  1. 87
      zfs-snapshot-backup.py

87
zfs-snapshot-backup.py

@ -1,4 +1,8 @@
#!/usr/local/bin/python #!/usr/local/bin/python
# simple python module to create incremental backups of zfs snapshots from
# elab file systems
import pathlib import pathlib
import subprocess import subprocess
@ -15,6 +19,15 @@ TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups"
def call(arguments, as_text=False): def call(arguments, as_text=False):
""" run a command line argument
simple wrapper around subprocess.run() with some sensible defaults
:params arguments: list of command line arguments and parameters
:params as_text: should the output treated as text
:returns: bytesarray or string (if as_text is trueish)
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
result = subprocess.run( result = subprocess.run(
arguments, arguments,
check=True, check=True,
@ -25,22 +38,44 @@ def call(arguments, as_text=False):
def remote_call(arguments): def remote_call(arguments):
""" makes runs an command on the remote backup server
:params arguments: list of command line arguments and parameters
:returns: string of the command output
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE] cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE]
cmd.extend(arguments) cmd.extend(arguments)
return call(cmd, as_text=True) return call(cmd, as_text=True)
def clean_split(text): def clean_split(text):
""" splits a text on whitespace, only returns nonempty items as list_elab_snapshots()
:params text: string to split
:returns: list of nonempty items
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
items = (item.strip() for item in text.split()) items = (item.strip() for item in text.split())
return [item for item in items if item] return [item for item in items if item]
def list_snapshots(): def list_snapshots():
""" lists the available zfs snapshots for the ZFS_POOL
:returns: list of snapshots
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL] cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL]
return clean_split(call(cmd, as_text=True)) return clean_split(call(cmd, as_text=True))
def list_elab_snapshots(): def list_elab_snapshots():
""" lists the zfs snapshots for elab file systems
:returns: dict of snapshot lists with the elab member as key
:raises subprocess.CalledProcessError: if command has not an exit value of 0
"""
result = {} result = {}
for snapshot in list_snapshots(): for snapshot in list_snapshots():
pool, snap_name = snapshot.split("/", 1) pool, snap_name = snapshot.split("/", 1)
@ -54,11 +89,29 @@ def list_elab_snapshots():
def snapshot_short_name(full_snapshot_name): def snapshot_short_name(full_snapshot_name):
""" extracts the short name of a snapshot from the full reference
e.g. "Datenspeicher/elabfs-LukasMetzler@auto-20190806.0200-1w" woulld be
translated to "elabfs-LukasMetzler@auto-20190806.0200-1w"
:params full_snapshot_name: string of the full snapshot reference
:returns string: snapshot name without pool identification
"""
pool, name = full_snapshot_name.split("/", 1) pool, name = full_snapshot_name.split("/", 1)
return name return name
:
def backup_filename(current, last=None):
""" returns the filename for a full or incremental backup
if only the current snapshot is provided, the filename will be in the
format for a full backup, if the reference for the last backup is provided,
the filename will be in the format of a incremental backup
def gzip_filename(current, last=None): :params current: full reference to the current zfs snapshot
:params last: (optional) full reference to the last backuped zfs snapshot
:returns: filename of the backup file
"""
current_name = snapshot_short_name(current) current_name = snapshot_short_name(current)
if last is None: if last is None:
return f"{current_name}.gz" return f"{current_name}.gz"
@ -68,7 +121,12 @@ def gzip_filename(current, last=None):
def extract_snapshot_name(filename): def extract_snapshot_name(filename):
# elabfs-LukasMetzler@auto-20190807.0200-1w.gz """ returns the target snapshot name from a backup filename
:params filename: name of an backup file
:returns: short name of the backed up zfs snapshot
:raises: ValueError if format of filename does not match
"""
if not filename.endswith(".gz"): if not filename.endswith(".gz"):
raise ValueError(f"Not a gzip file: {filename}") raise ValueError(f"Not a gzip file: {filename}")
if not filename.startswith(ZFS_ELAB_PREFIX): if not filename.startswith(ZFS_ELAB_PREFIX):
@ -86,6 +144,15 @@ def extract_snapshot_name(filename):
def list_remote_backups(members): def list_remote_backups(members):
""" list the available backup files by elab members
The elab members for whom current zfs snapshots are available must be
supplied, so only these get queried. If backups of snapshots from former
members are still on the backup server, these will be ignored.
:params members: list of elab members that have zfs snapshots to backup
:returns: dict of set with backup entries for each elab member
"""
result = {} result = {}
for member in members: for member in members:
remote_sub_dir = f"{REMOTE_PATH}/{member}" remote_sub_dir = f"{REMOTE_PATH}/{member}"
@ -99,6 +166,16 @@ def list_remote_backups(members):
def backup_latest_snapshot(member, elab_snapshots, existing_backups): def backup_latest_snapshot(member, elab_snapshots, existing_backups):
""" backup the latest zfs snapshot for an elab member
This will try to create an incremental backup but will fall back to a full
backup if it is not possible.
:params member: name of the elab member
:params elab_snapshots: list of currently available snapshots for the member
:params existing_backups: set of available backup names
"""
print(f"backing up member {member}") print(f"backing up member {member}")
snapshots = sorted(elab_snapshots, reverse=True) snapshots = sorted(elab_snapshots, reverse=True)
current_snapshot = snapshots[0] current_snapshot = snapshots[0]
@ -114,14 +191,14 @@ def backup_latest_snapshot(member, elab_snapshots, existing_backups):
elif latest_backup is None: elif latest_backup is None:
# no snapshot was found in backups, make a full backup for consistency # no snapshot was found in backups, make a full backup for consistency
send_cmd = ["zfs", "send", current_snapshot] send_cmd = ["zfs", "send", current_snapshot]
gzip_tmp_filename = gzip_filename(current_snapshot) gzip_tmp_filename = backup_filename(current_snapshot)
print(f" - full backup, latest snapshot: {current_snapshot}") print(f" - full backup, latest snapshot: {current_snapshot}")
else: else:
# make an incremental backup # make an incremental backup
print( print(
f" - incremental backup, from: {latest_backup} to: {current_snapshot}" f" - incremental backup, from: {latest_backup} to: {current_snapshot}"
) )
gzip_tmp_filename = gzip_filename( gzip_tmp_filename = backup_filename(
current=current_snapshot, last=latest_backup current=current_snapshot, last=latest_backup
) )
send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot] send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot]
@ -149,6 +226,7 @@ def backup_latest_snapshot(member, elab_snapshots, existing_backups):
def create_backups(): def create_backups():
""" batch create backups for all available elab snapshots"""
elab_snapshots = list_elab_snapshots() elab_snapshots = list_elab_snapshots()
existing_backups = list_remote_backups(elab_snapshots.keys()) existing_backups = list_remote_backups(elab_snapshots.keys())
for member, snapshots in elab_snapshots.items(): for member, snapshots in elab_snapshots.items():
@ -157,4 +235,5 @@ def create_backups():
if __name__ == "__main__": if __name__ == "__main__":
# run the batch backup function if the file is called
create_backups() create_backups()

Loading…
Cancel
Save