You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
243 lines
8.3 KiB
243 lines
8.3 KiB
import io |
|
import numpy |
|
import pandas |
|
import datetime |
|
|
|
from collections import namedtuple |
|
|
|
from . import utils |
|
|
|
|
|
DropStatusInfo = namedtuple("DropStatusInfo", ["when", "status"]) |
|
GraphProperties = namedtuple("GraphProperties", ["min", "max", "label"]) |
|
GraphSettings = namedtuple("GraphSettings", ["distance", "offset", "volume"]) |
|
LogResult = namedtuple("LogResult", ["files", "print", "drops", "statistics"]) |
|
Nozzle = namedtuple("Nozzle", ["number", "voltage", "pulse", "drops_failed"]) |
|
SoftwareVersion = namedtuple("Version", ["major", "minor", "patch"]) |
|
Statistics = namedtuple("Statistics", ["nozzles", "failed_pre_run", "failed_post_run"]) |
|
|
|
GRAPH_SETTINGS = { |
|
3: GraphSettings( |
|
distance=GraphProperties(min=0, max=400, label="Distance [pixels]"), |
|
offset=GraphProperties(min=-100, max=100, label="Traverse [pixels]"), |
|
volume=GraphProperties(min=0, max=600, label="Volume [pl]"), |
|
), |
|
10: GraphSettings( |
|
distance=GraphProperties(min=0, max=3, label="Speed [m/s]"), |
|
offset=GraphProperties(min=-140, max=140, label="Deviaton [µm]"), |
|
volume=GraphProperties(min=0, max=600, label="Volume [pl]"), |
|
), |
|
} |
|
|
|
|
|
class PrintLog: |
|
def __init__(self, log_file, printer, version): |
|
# construction parameters |
|
self.log_file = log_file |
|
self.printer = printer |
|
self.software_version = version |
|
|
|
# runid is derived from the filename |
|
run_id, _ = log_file.stem.rsplit("_", 1) |
|
self.run_id = run_id |
|
|
|
try: |
|
self.graph_settings = GRAPH_SETTINGS[version.major] |
|
except KeyError: |
|
raise ValueError(f"Unknown Scienion Software Version {version.major}") |
|
|
|
# common parameters of the print log |
|
self.humidity_setting = None |
|
self.pattern_file = None |
|
self.print_solutions = None |
|
self.run_method = None |
|
self.source_plate = None |
|
self.target_substrate = None |
|
self.target_count = None |
|
|
|
# dataframe for humidity and temperature |
|
self.environment = None |
|
|
|
def parse(self, filehandle): |
|
self.parse_header(filehandle) |
|
self.parse_source_wells(filehandle) |
|
self.parse_environment(filehandle) |
|
|
|
def parse_header(self, iterator): |
|
for line in iterator: |
|
if line.startswith("Field(s):"): |
|
break |
|
|
|
parts = line.split(":", 1) |
|
if len(parts) != 2: |
|
continue |
|
|
|
key, value = parts[0].strip(), parts[1].strip() |
|
if key == "Probe": |
|
self.source_plate = value |
|
elif key == "Target": |
|
substrate, targets_str = value.split(":") |
|
self.target_substrate = substrate.strip() |
|
self.target_count = len(targets_str.split(",")) |
|
elif key.startswith("Pattern File"): |
|
self.pattern_file = value |
|
elif key == "Humidity": |
|
self.humidity_setting = value |
|
elif key == "Run Name": |
|
self.run_method = value |
|
|
|
def parse_source_wells(self, iterator): |
|
# first we need to move ahead a little bit |
|
for line in iterator: |
|
if line.startswith("Field "): |
|
break |
|
raw_wells = [] |
|
|
|
for line in iterator: |
|
if line.startswith("Drops"): |
|
break |
|
line = line.strip() |
|
if line == "" or line[0] in ("F", "["): |
|
continue |
|
else: |
|
raw_wells.extend(line.split("\t")) |
|
|
|
stripped = (entry.strip() for entry in raw_wells) |
|
wells = (entry for entry in stripped if entry) |
|
self.print_solutions = len(set(wells)) |
|
|
|
def parse_environment(self, iterator): |
|
buff = io.StringIO() |
|
for line in iterator: |
|
if "\tHumidity=\t" in line: |
|
buff.write(line) |
|
buff.seek(0) |
|
|
|
f = lambda s: datetime.datetime.strptime(s, "%d.%m.%y-%H:%M:%S.%f") |
|
tmp_df = pandas.read_csv( |
|
buff, sep="\t", header=None, index_col=0, parse_dates=True, date_parser=f |
|
) |
|
self.environment = pandas.DataFrame( |
|
{"humidity": tmp_df.iloc[:, 1], "temperature": tmp_df.iloc[:, 3]} |
|
) |
|
|
|
|
|
def parse_print_log(log_files): |
|
with open(log_files.print, "r", encoding="iso-8859-1") as filehandle: |
|
# parse the printer name |
|
printer_line = next(filehandle) |
|
printer = printer_line.split()[0] |
|
|
|
# get the software version info |
|
version_line = next(filehandle) |
|
_, version_info = version_line.split(":", 1) |
|
major, minor, patch, _ = version_info.strip().split(".", 3) |
|
version = SoftwareVersion(int(major), int(minor), int(patch)) |
|
|
|
log_parser = PrintLog(log_files.print, printer, version) |
|
log_parser.parse(filehandle) |
|
return log_parser |
|
|
|
|
|
def cast(original, to, default=numpy.nan): |
|
if hasattr(original, "strip"): |
|
original = original.strip() |
|
try: |
|
return to(original) |
|
except: |
|
return default |
|
|
|
|
|
def parse_value(log_line, to, default=numpy.nan): |
|
_, value = log_line.split("=", 1) |
|
return cast(value, to, default) |
|
|
|
|
|
def parse_file_name(file_path): |
|
name_parts = [p for p in file_path.stem.split("_") if p] |
|
*_, date, unknown, autodrop, time, info = name_parts |
|
when = date + time # parsing datetime is done in the pandas dataframe |
|
if info.lower().endswith("ok"): |
|
status = utils.DropState.OK |
|
else: |
|
status = utils.DropState.FAULT |
|
return DropStatusInfo(when, status) |
|
|
|
|
|
def parse_drop_file(file_path): |
|
status_info = parse_file_name(file_path) |
|
data = { |
|
"path": file_path, |
|
"when": status_info.when, |
|
"status": status_info.status.value, |
|
"distance": numpy.nan, # as default value |
|
"offset": numpy.nan, # as default value |
|
"volume": numpy.nan, # as default value |
|
} |
|
|
|
with open(file_path, "r", encoding="iso-8859-1") as filehandle: |
|
if status_info.status == utils.DropState.OK: |
|
# only parse distance and offset if it is not a failed check |
|
next(filehandle) # ignore first line |
|
flight_info = next(filehandle) |
|
distance, offset = flight_info.split() |
|
data["distance"] = cast(distance, float) |
|
data["offset"] = cast(offset, float) |
|
|
|
for line in filehandle: |
|
if line.startswith("Well"): |
|
well_id = parse_value(line, str) |
|
data["plate"] = cast(well_id[0], int) |
|
data["well"] = well_id[1:] |
|
elif line.startswith("Nozzle No"): |
|
data["nozzle"] = parse_value(line, int) |
|
elif line.startswith("Nozzle Voltage"): |
|
data["voltage"] = parse_value(line, int) |
|
elif line.startswith("Nozzle Pulse"): |
|
data["pulse"] = parse_value(line, int) |
|
elif ( |
|
line.startswith("Drop Volume") |
|
and status_info.status == utils.DropState.OK |
|
): |
|
data["volume"] = parse_value(line, int) |
|
|
|
data["well_id"] = f"{data['nozzle']}.{well_id}" # nozzle is added for a complete id |
|
return data |
|
|
|
|
|
def parse_drop_logs(log_files): |
|
collection = (parse_drop_file(f) for f in log_files.drops) |
|
df = pandas.DataFrame(collection) |
|
df["when"] = pandas.to_datetime(df["when"], format="%Y%m%d%H%M%S") |
|
|
|
# find the pre run values |
|
grouped = df.groupby("well_id") |
|
pre_run_df = grouped["when"].min().reset_index() |
|
pre_run_df["measurement"] = "pre run" |
|
|
|
# merge them back into the dataframe |
|
df = df.merge(pre_run_df, on=["well_id", "when"], how="outer") |
|
|
|
# the ones with not set values are post runs |
|
df = df.fillna({"measurement": "post run"}) |
|
return df |
|
|
|
|
|
def collect_statistics(drop_log): |
|
nozzle_df = drop_log.groupby("nozzle").first() |
|
nozzles = [] |
|
for nozzle_nr, row in nozzle_df.iterrows(): |
|
failures = utils.find_failed_drops(drop_log, nozzle_nr) |
|
nozzles.append(Nozzle(nozzle_nr, row["voltage"], row["pulse"], failures)) |
|
|
|
total_failures = utils.find_failed_drops(drop_log, nozzle=None) |
|
return Statistics( |
|
nozzles, len(total_failures.pre_run), len(total_failures.post_run) |
|
) |
|
|
|
|
|
def parse_logs(log_files): |
|
print_log = parse_print_log(log_files) |
|
drop_log = parse_drop_logs(log_files) |
|
stats = collect_statistics(drop_log) |
|
return LogResult(log_files, print_log, drop_log, stats)
|
|
|