Automatically create (incremental) backups of zfs snapshots on a file server.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

160 lines
5.2 KiB

#!/usr/local/bin/python
import pathlib
import subprocess
SSH_KEY_FILE = "/mnt/Datenspeicher/snap-backup-dataset/backup_key"
SSH_REMOTE = "zfs_snap_backup@etha.cpi.imtek.uni-freiburg.de"
REMOTE_PATH = "zfs-backups"
SCP_REMOTE_URL = f"{SSH_REMOTE}:~/{REMOTE_PATH}/"
ZFS_POOL = "Datenspeicher"
ZFS_ELAB_PREFIX = "elabfs-"
TMP_BACKUP_FOLDER = "/mnt/Datenspeicher/snap-backup-dataset/temporary-backups"
def call(arguments, as_text=False):
result = subprocess.run(
arguments,
check=True,
stdout=subprocess.PIPE,
universal_newlines=as_text,
)
return result.stdout
def remote_call(arguments):
cmd = ["ssh", "-i", SSH_KEY_FILE, SSH_REMOTE]
cmd.extend(arguments)
return call(cmd, as_text=True)
def clean_split(text):
items = (item.strip() for item in text.split())
return [item for item in items if item]
def list_snapshots():
cmd = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-r", ZFS_POOL]
return clean_split(call(cmd, as_text=True))
def list_elab_snapshots():
result = {}
for snapshot in list_snapshots():
pool, snap_name = snapshot.split("/", 1)
if snap_name.startswith(ZFS_ELAB_PREFIX):
prefix_and_member, _ = snap_name.rsplit("@", 1)
member = prefix_and_member.replace(ZFS_ELAB_PREFIX, "")
if member not in result:
result[member] = []
result[member].append(snapshot)
return result
def snapshot_short_name(full_snapshot_name):
pool, name = full_snapshot_name.split("/", 1)
return name
def gzip_filename(current, last=None):
current_name = snapshot_short_name(current)
if last is None:
return f"{current_name}.gz"
else:
last_name = snapshot_short_name(last)
return f"{last_name}.to.{current_name}.gz"
def extract_snapshot_name(filename):
# elabfs-LukasMetzler@auto-20190807.0200-1w.gz
if not filename.endswith(".gz"):
raise ValueError(f"Not a gzip file: {filename}")
if not filename.startswith(ZFS_ELAB_PREFIX):
raise ValueError(f"Not an elabfs snapshot: {filename}")
snapshot = filename[:-3]
if snapshot.count(".") == 1 and snapshot.count("@") == 1:
# elabfs-...@auto-20190807.0200-1w
return snapshot
elif snapshot.count(".to.") == 1 and snapshot.count("@") == 2:
# elabfs-...@auto-20190806.0200-1w.to.elabfs-...@auto-20190807.0200-1w
old_snapshot, new_snapshot = snapshot.split(".to.")
return new_snapshot
else:
raise ValueError(f"Unknown Filename Format: {filename}")
def list_remote_backups(members):
result = {}
for member in members:
remote_sub_dir = f"{REMOTE_PATH}/{member}"
try:
backups = clean_split(remote_call(["ls", remote_sub_dir]))
result[member] = set((extract_snapshot_name(i) for i in backups))
except subprocess.CalledProcessError:
remote_call(["mkdir", remote_sub_dir])
result[member] = set()
return result
def backup_latest_snapshot(member, elab_snapshots, existing_backups):
print(f"backing up member {member}")
snapshots = sorted(elab_snapshots, reverse=True)
current_snapshot = snapshots[0]
latest_backup = None
for snapshot in snapshots:
if snapshot_short_name(snapshot) in existing_backups:
latest_backup = snapshot
break
if current_snapshot == latest_backup:
# nothing to back up
print(f"- nothing to backup, latest snapshot: {current_snapshot}")
return
elif latest_backup is None:
# no snapshot was found in backups, make a full backup for consistency
send_cmd = ["zfs", "send", current_snapshot]
gzip_tmp_filename = gzip_filename(current_snapshot)
print(f" - full backup, latest snapshot: {current_snapshot}")
else:
# make an incremental backup
print(
f" - incremental backup, from: {latest_backup} to: {current_snapshot}"
)
gzip_tmp_filename = gzip_filename(
current=current_snapshot, last=latest_backup
)
send_cmd = ["zfs", "send", "-I", latest_backup, current_snapshot]
# create the backup
tmp_gzip_filepath = pathlib.Path(TMP_BACKUP_FOLDER) / gzip_tmp_filename
print(f" - generating temporary backup file {tmp_gzip_filepath.name}")
with open(tmp_gzip_filepath, "wb") as file_handle:
gzip_in = subprocess.Popen(
"gzip", stdin=subprocess.PIPE, stdout=file_handle
).stdin
subprocess.run(send_cmd, stdout=gzip_in, check=True)
# copy the backup to the remote server
print(" - copying temporary backup file")
remote_url = (
f"{SSH_REMOTE}:~/{REMOTE_PATH}/{member}/{tmp_gzip_filepath.name}"
)
copy_cmd = ["scp", "-i", SSH_KEY_FILE, str(tmp_gzip_filepath), remote_url]
call(copy_cmd)
# remove the temporary file
print(" - removing temporary backup file")
tmp_gzip_filepath.unlink()
def create_backups():
elab_snapshots = list_elab_snapshots()
existing_backups = list_remote_backups(elab_snapshots.keys())
for member, snapshots in elab_snapshots.items():
members_backups = existing_backups.get(member, [])
backup_latest_snapshot(member, snapshots, members_backups)
if __name__ == "__main__":
create_backups()