You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

243 lines
8.3 KiB

import io
import numpy
import pandas
import datetime
from collections import namedtuple
from . import utils
DropStatusInfo = namedtuple("DropStatusInfo", ["when", "status"])
GraphProperties = namedtuple("GraphProperties", ["min", "max", "label"])
GraphSettings = namedtuple("GraphSettings", ["distance", "offset", "volume"])
LogResult = namedtuple("LogResult", ["files", "print", "drops", "statistics"])
Nozzle = namedtuple("Nozzle", ["number", "voltage", "pulse", "drops_failed"])
SoftwareVersion = namedtuple("Version", ["major", "minor", "patch"])
Statistics = namedtuple("Statistics", ["nozzles", "failed_pre_run", "failed_post_run"])
GRAPH_SETTINGS = {
3: GraphSettings(
distance=GraphProperties(min=0, max=400, label="Distance [pixels]"),
offset=GraphProperties(min=-100, max=100, label="Traverse [pixels]"),
volume=GraphProperties(min=0, max=600, label="Volume [pl]"),
),
10: GraphSettings(
distance=GraphProperties(min=0, max=3, label="Speed [m/s]"),
offset=GraphProperties(min=-140, max=140, label="Deviaton [µm]"),
volume=GraphProperties(min=0, max=600, label="Volume [pl]"),
),
}
class PrintLog:
def __init__(self, log_file, printer, version):
# construction parameters
self.log_file = log_file
self.printer = printer
self.software_version = version
# runid is derived from the filename
run_id, _ = log_file.stem.rsplit("_", 1)
self.run_id = run_id
try:
self.graph_settings = GRAPH_SETTINGS[version.major]
except KeyError:
raise ValueError(f"Unknown Scienion Software Version {version.major}")
# common parameters of the print log
self.humidity_setting = None
self.pattern_file = None
self.print_solutions = None
self.run_method = None
self.source_plate = None
self.target_substrate = None
self.target_count = None
# dataframe for humidity and temperature
self.environment = None
def parse(self, filehandle):
self.parse_header(filehandle)
self.parse_source_wells(filehandle)
self.parse_environment(filehandle)
def parse_header(self, iterator):
for line in iterator:
if line.startswith("Field(s):"):
break
parts = line.split(":", 1)
if len(parts) != 2:
continue
key, value = parts[0].strip(), parts[1].strip()
if key == "Probe":
self.source_plate = value
elif key == "Target":
substrate, targets_str = value.split(":")
self.target_substrate = substrate.strip()
self.target_count = len(targets_str.split(","))
elif key.startswith("Pattern File"):
self.pattern_file = value
elif key == "Humidity":
self.humidity_setting = value
elif key == "Run Name":
self.run_method = value
def parse_source_wells(self, iterator):
# first we need to move ahead a little bit
for line in iterator:
if line.startswith("Field "):
break
raw_wells = []
for line in iterator:
if line.startswith("Drops"):
break
line = line.strip()
if line == "" or line[0] in ("F", "["):
continue
else:
raw_wells.extend(line.split("\t"))
stripped = (entry.strip() for entry in raw_wells)
wells = (entry for entry in stripped if entry)
self.print_solutions = len(set(wells))
def parse_environment(self, iterator):
buff = io.StringIO()
for line in iterator:
if "\tHumidity=\t" in line:
buff.write(line)
buff.seek(0)
f = lambda s: datetime.datetime.strptime(s, "%d.%m.%y-%H:%M:%S.%f")
tmp_df = pandas.read_csv(
buff, sep="\t", header=None, index_col=0, parse_dates=True, date_parser=f
)
self.environment = pandas.DataFrame(
{"humidity": tmp_df.iloc[:, 1], "temperature": tmp_df.iloc[:, 3]}
)
def parse_print_log(log_files):
with open(log_files.print, "r", encoding="iso-8859-1") as filehandle:
# parse the printer name
printer_line = next(filehandle)
printer = printer_line.split()[0]
# get the software version info
version_line = next(filehandle)
_, version_info = version_line.split(":", 1)
major, minor, patch, _ = version_info.strip().split(".", 3)
version = SoftwareVersion(int(major), int(minor), int(patch))
log_parser = PrintLog(log_files.print, printer, version)
log_parser.parse(filehandle)
return log_parser
def cast(original, to, default=numpy.nan):
if hasattr(original, "strip"):
original = original.strip()
try:
return to(original)
except:
return default
def parse_value(log_line, to, default=numpy.nan):
_, value = log_line.split("=", 1)
return cast(value, to, default)
def parse_file_name(file_path):
name_parts = [p for p in file_path.stem.split("_") if p]
*_, date, unknown, autodrop, time, info = name_parts
when = date + time # parsing datetime is done in the pandas dataframe
if info.lower().endswith("ok"):
status = utils.DropState.OK
else:
status = utils.DropState.FAULT
return DropStatusInfo(when, status)
def parse_drop_file(file_path):
status_info = parse_file_name(file_path)
data = {
"path": file_path,
"when": status_info.when,
"status": status_info.status.value,
"distance": numpy.nan, # as default value
"offset": numpy.nan, # as default value
"volume": numpy.nan, # as default value
}
with open(file_path, "r", encoding="iso-8859-1") as filehandle:
if status_info.status == utils.DropState.OK:
# only parse distance and offset if it is not a failed check
next(filehandle) # ignore first line
flight_info = next(filehandle)
distance, offset = flight_info.split()
data["distance"] = cast(distance, float)
data["offset"] = cast(offset, float)
for line in filehandle:
if line.startswith("Well"):
well_id = parse_value(line, str)
data["plate"] = cast(well_id[0], int)
data["well"] = well_id[1:]
elif line.startswith("Nozzle No"):
data["nozzle"] = parse_value(line, int)
elif line.startswith("Nozzle Voltage"):
data["voltage"] = parse_value(line, int)
elif line.startswith("Nozzle Pulse"):
data["pulse"] = parse_value(line, int)
elif (
line.startswith("Drop Volume")
and status_info.status == utils.DropState.OK
):
data["volume"] = parse_value(line, int)
data["well_id"] = f"{data['nozzle']}.{well_id}" # nozzle is added for a complete id
return data
def parse_drop_logs(log_files):
collection = (parse_drop_file(f) for f in log_files.drops)
df = pandas.DataFrame(collection)
df["when"] = pandas.to_datetime(df["when"], format="%Y%m%d%H%M%S")
# find the pre run values
grouped = df.groupby("well_id")
pre_run_df = grouped["when"].min().reset_index()
pre_run_df["measurement"] = "pre run"
# merge them back into the dataframe
df = df.merge(pre_run_df, on=["well_id", "when"], how="outer")
# the ones with not set values are post runs
df = df.fillna({"measurement": "post run"})
return df
def collect_statistics(drop_log):
nozzle_df = drop_log.groupby("nozzle").first()
nozzles = []
for nozzle_nr, row in nozzle_df.iterrows():
failures = utils.find_failed_drops(drop_log, nozzle_nr)
nozzles.append(Nozzle(nozzle_nr, row["voltage"], row["pulse"], failures))
total_failures = utils.find_failed_drops(drop_log, nozzle=None)
return Statistics(
nozzles, len(total_failures.pre_run), len(total_failures.post_run)
)
def parse_logs(log_files):
print_log = parse_print_log(log_files)
drop_log = parse_drop_logs(log_files)
stats = collect_statistics(drop_log)
return LogResult(log_files, print_log, drop_log, stats)