Browse Source

restructured modules

master
Holger Frey 6 years ago
parent
commit
59813146e7
  1. 5
      ideas.md
  2. 22
      mtor/__init__.py
  3. 38
      mtor/commons.py
  4. 277
      mtor/dataproc.py
  5. 112
      mtor/imageproc.py
  6. 179
      mtor/parameters.py
  7. 40
      mtor/postproc.py
  8. 161
      mtor/prescan.py
  9. 108
      mtor/workflows.py

5
ideas.md

@ -0,0 +1,5 @@
Furhter Ideas:
- color the roi
- create a movie
- automatic use of cached values

22
mtor/__init__.py

@ -1,21 +1,23 @@
import warnings import warnings
from .prescan import prescan_workflow from .workflows import (
from .imageproc import image_workflow prescan_workflow,
from .dataproc import data_workflow, cached_workflow image_workflow,
from .filesort import filesort_workflow data_workflow,
cached_data_workflow,
postprocessing_workflow,
)
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
def run(): def run():
# settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660) # settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660)
settings = prescan_workflow("mtor-bilder", 50, 725+150, 300, 725) pw = prescan_workflow("mtor-bilder", 50, 725 + 150, 300, 725)
stats_results = image_workflow(settings) iw = image_workflow(pw.parameters)
selected_files = data_workflow(stats_results, settings) dw = data_workflow(iw.data, iw.parameters)
filesort_workflow(selected_files, settings) fw = postprocessing_workflow(dw.data, dw.parameters) # noqa: F841
def run_data(): def run_data():
selected_files, settings = cached_workflow("mtor-bilder") cw = cached_data_workflow("mtor-bilder")
filesort_workflow(selected_files, settings)

38
mtor/commons.py

@ -0,0 +1,38 @@
import functools
import numpy
import re
from collections import namedtuple
from .luts import FIRE_LUT # noqa: F401
IMG_MAX_INTENSITY = 65535 # 2^16 - 1
LABEL_SELECTED = "selected"
LABEL_DISCARDED = "discarded"
OUTPUT_FOLDER_NAMES = (
"colored",
"cuts",
f"cuts_{LABEL_DISCARDED}",
f"cuts_{LABEL_SELECTED}",
"data",
)
RE_DIGITS = re.compile(r"(\d+)")
ROI_STATISTIC_FUNCTIONS = {
"min": numpy.amin,
"max": numpy.amax,
"1quantile": functools.partial(numpy.percentile, q=25),
"2quantile": functools.partial(numpy.percentile, q=50),
"3quantile": functools.partial(numpy.percentile, q=75),
"average": numpy.average,
"median": numpy.median,
"std": numpy.std,
}
TifImage = namedtuple("TifImage", ["path", "frame"])
TifArray = namedtuple("TifArray", ["path", "frame", "data"])
WorkflowResult = namedtuple("WorkflowResult", ["data", "parameters"])

277
mtor/dataproc.py

@ -6,13 +6,12 @@ import pickle
import seaborn import seaborn
import matplotlib.pyplot as pyplot import matplotlib.pyplot as pyplot
from peakutils.plot import plot as peakplot
from scipy.signal import savgol_filter from scipy.signal import savgol_filter
from .imageproc import STATS_FUNCS, LEFT_GUARD, ROI_NAME, RIGHT_GUARD from .commons import ROI_STATISTIC_FUNCTIONS
def peakplot_iloc(x, y, ind): def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"):
""" """
Plots the original data with the peaks that were identified Plots the original data with the peaks that were identified
@ -29,14 +28,25 @@ def peakplot_iloc(x, y, ind):
ind : array-like ind : array-like
Indexes of the identified peaks Indexes of the identified peaks
""" """
pyplot.plot(x, y, "--") pyplot.plot(x, y)
num_items = len(ix1)
pyplot.plot( pyplot.plot(
x.iloc[ind], x.iloc[ix1],
y.iloc[ind], y.iloc[ix1],
"r+", "r+",
ms=5, ms=5,
mew=2, mew=2,
label="{} peaks".format(len(ind)), label=f"{num_items} {ix1_label}",
)
if ix2 is not None:
num_items = len(ix2)
pyplot.plot(
x.iloc[ix2],
y.iloc[ix2],
"g1",
ms=5,
mew=2,
label=f"{num_items} {ix2_label}",
) )
pyplot.legend() pyplot.legend()
@ -56,26 +66,31 @@ def set_plotting_styles():
seaborn.set_context("talk") seaborn.set_context("talk")
def init_pandas_data_frame(): def init_pandas_data_frame(parameters):
columns = ["file", "frame"] columns = ["file", "frame"]
for prefix in (LEFT_GUARD, ROI_NAME, RIGHT_GUARD): groups = (
for key in STATS_FUNCS: parameters.roi_name,
columns.append(f"{prefix}.{key}") parameters.left_guard_name,
parameters.right_guard_name,
)
for group in groups:
for func_name in ROI_STATISTIC_FUNCTIONS:
columns.append(f"{group}.{func_name}")
return pandas.DataFrame(columns=columns) return pandas.DataFrame(columns=columns)
def construct_data_frame(stats_results): def construct_data_frame(stats_results, parameters):
data_frame = init_pandas_data_frame() data_frame = init_pandas_data_frame(parameters)
data_frame = data_frame.append(stats_results, ignore_index=True) data_frame = data_frame.append(stats_results, ignore_index=True)
return data_frame.sort_values(by=["frame"]).reset_index(drop=True) return data_frame.sort_values(by=["frame"]).reset_index(drop=True)
def find_guard_value(data_frame, settings): def find_guard_threshold(data_frame, parameters):
left_avg = data_frame["left.average"] left_values = data_frame[parameters.left_guard_column]
right_avg = data_frame["right.average"] right_values = data_frame[parameters.right_guard_column]
guards_avg = left_avg.append(right_avg, ignore_index=True) guard_values = left_values.append(right_values, ignore_index=True)
guard_data = numpy.histogram( guard_data = numpy.histogram(
guards_avg, bins=settings.guard_histogram_bins guard_values, bins=parameters.guard_histogram_bins
) )
guard_counts = guard_data[0].astype(numpy.float16) guard_counts = guard_data[0].astype(numpy.float16)
guard_edges = guard_data[1][1:] # edges enclose the counts guard_edges = guard_data[1][1:] # edges enclose the counts
@ -85,13 +100,15 @@ def find_guard_value(data_frame, settings):
pyplot.title("Histogram of Guard Avarages (not filtered)") pyplot.title("Histogram of Guard Avarages (not filtered)")
pyplot.xlabel("Average Intensity [au]") pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]") pyplot.ylabel("Number of Observations [1]")
path = settings.data_dir / "1-histogram-of-guard-avarages-not-filtered.png" path = (
parameters.data_dir / "1-histogram-of-guard-avarages-not-filtered.png"
)
pyplot.savefig(str(path)) pyplot.savefig(str(path))
guard_counts_filtered = savgol_filter( guard_counts_filtered = savgol_filter(
guard_counts, guard_counts,
settings.guard_filter_window, parameters.guard_filter_window,
settings.guard_filter_polynom, parameters.guard_filter_polynom,
) )
pyplot.clf() pyplot.clf()
@ -99,7 +116,7 @@ def find_guard_value(data_frame, settings):
pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)") pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)")
pyplot.xlabel("Average Intensity [au]") pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]") pyplot.ylabel("Number of Observations [1]")
path = settings.data_dir / "2-histogram-of-guard-avarages-filtered.png" path = parameters.data_dir / "2-histogram-of-guard-avarages-filtered.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
# Finding the first minima after the first peak # Finding the first minima after the first peak
@ -114,47 +131,53 @@ def find_guard_value(data_frame, settings):
inverted_series = max(guard_counts_filtered) - guard_counts_filtered inverted_series = max(guard_counts_filtered) - guard_counts_filtered
indexes = peakutils.indexes( indexes = peakutils.indexes(
inverted_series[first_peak_position:], inverted_series[first_peak_position:],
min_dist=settings.guards_minima_min_dist, min_dist=parameters.guards_minima_min_dist,
) )
# since we shortened the data, we need to add the first peak position # since we shortened the data, we need to add the first peak position
first_minima_position = indexes[0] + first_peak_position first_minima_position = indexes[0] + first_peak_position
settings.guard_max_value = guard_edges[first_minima_position] parameters.guard_max_value = guard_edges[first_minima_position]
pyplot.clf() pyplot.clf()
peakplot(guard_edges, guard_counts_filtered, [first_minima_position]) peakplot_iloc(
pandas.Series(guard_edges), pandas.Series(guard_counts_filtered), [first_minima_position], "minima"
)
pyplot.title( pyplot.title(
( (
f"Histogram of Guard Avarages (Savitzky-Golay filter)," f"Histogram of Guard Avarages (Savitzky-Golay filter),"
f" first minima at {int(settings.guard_max_value)} au" f" first minima at {int(parameters.guard_max_value)} au"
) )
) )
pyplot.xlabel("Average Intensity [au]") pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]") pyplot.ylabel("Number of Observations [1]")
path = ( path = (
settings.data_dir parameters.data_dir
/ "3-histogram-of-guard-avarages-filtered-with-first-minima.png" / "3-histogram-of-guard-avarages-filtered-with-first-minima.png"
) )
pyplot.savefig(str(path)) pyplot.savefig(str(path))
def check_guards(data_frame, settings): def check_guards(data_frame, parameters):
for what in (LEFT_GUARD, RIGHT_GUARD): for what in (parameters.left_guard_name, parameters.right_guard_name):
ok_col = f"{what}.ok" ok_col = f"{what}.ok"
data_frame[ok_col] = False data_frame[ok_col] = False
mask = data_frame[f"{what}.average"] < settings.guard_max_value mask = (
data_frame[f"{what}.{parameters.guard_stats}"]
< parameters.guard_max_value
)
data_frame.loc[mask, ok_col] = True data_frame.loc[mask, ok_col] = True
data_frame["guards.ok"] = ( data_frame["guards.ok"] = (
data_frame[f"{LEFT_GUARD}.ok"] & data_frame[f"{RIGHT_GUARD}.ok"] data_frame[f"{parameters.left_guard_name}.ok"]
& data_frame[f"{parameters.right_guard_name}.ok"]
) )
mask = data_frame["guards.ok"] == True mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy() guarded_df = data_frame[mask].copy()
pyplot.clf() pyplot.clf()
ax = seaborn.scatterplot( ax = seaborn.scatterplot(
x="frame", x="frame",
y=f"{ROI_NAME}.average", y=parameters.roi_column,
data=data_frame, data=data_frame,
hue="guards.ok", hue="guards.ok",
hue_order=[True, False], hue_order=[True, False],
@ -163,13 +186,13 @@ def check_guards(data_frame, settings):
pyplot.title("Selection based on guard values") pyplot.title("Selection based on guard values")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
pyplot.ylabel("Frame Number [1]") pyplot.ylabel("Frame Number [1]")
path = settings.data_dir / "4-image-selection-based-on-guard-values.png" path = parameters.data_dir / "4-image-selection-based-on-guard-values.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
settings.charts_y_limit = ax.get_ylim() parameters.charts_y_limit = ax.get_ylim()
pyplot.clf() pyplot.clf()
ax = seaborn.scatterplot( ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average", data=guarded_df x="frame", y=parameters.roi_column, data=guarded_df
) )
count_all_images = len(data_frame) count_all_images = len(data_frame)
count_guarded_images = len(guarded_df) count_guarded_images = len(guarded_df)
@ -181,81 +204,80 @@ def check_guards(data_frame, settings):
) )
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit) ax.set_ylim(parameters.charts_y_limit)
path = settings.data_dir / "5-selected-values-based-on-guard-values.png" path = parameters.data_dir / "5-selected-values-based-on-guard-values.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
return data_frame return data_frame
def find_outliers(data_frame, settings): def find_outliers(data_frame, parameters):
mask = data_frame["guards.ok"] == True mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy() guarded_df = data_frame[mask].copy()
pyplot.clf() pyplot.clf()
seaborn.boxplot(data=guarded_df, x=f"{ROI_NAME}.average") seaborn.boxplot(data=guarded_df, x=parameters.roi_column)
pyplot.title(f"Boxblot of guarded values") pyplot.title(f"Boxblot of guarded values")
pyplot.xlabel("Average Intensity [au]") pyplot.xlabel("Average Intensity [au]")
path = settings.data_dir / "6-boxplot-of-guarded-values.png" path = parameters.data_dir / "6-boxplot-of-guarded-values.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
lower_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.25) lower_quartil = guarded_df[parameters.roi_column].quantile(0.25)
upper_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.75) upper_quartil = guarded_df[parameters.roi_column].quantile(0.75)
inter_quartil_range = upper_quartil - lower_quartil inter_quartil_range = upper_quartil - lower_quartil
settings.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
data_frame["outlier.ok"] = ( data_frame["outlier.ok"] = (
data_frame[f"{ROI_NAME}.average"] < settings.outlier_upper_limit data_frame[parameters.roi_column] < parameters.outlier_upper_limit
) )
return data_frame return data_frame
def select_value_on_guards_and_outliers(data_frame, settings): def select_on_guards_and_outliers(data_frame, parameters):
data_frame["outlier_guards.ok"] = ( data_frame["outlier_guards.ok"] = (
data_frame["guards.ok"] & data_frame["outlier.ok"] data_frame["guards.ok"] & data_frame["outlier.ok"]
) )
mask = data_frame["outlier_guards.ok"] == True mask = data_frame["outlier_guards.ok"] == True # noqa: E712
selected_df = data_frame[mask].copy() selected_df = data_frame[mask].copy()
pyplot.clf() pyplot.clf()
ax = seaborn.scatterplot( ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average", data=selected_df x="frame", y=parameters.roi_column, data=selected_df
) )
pyplot.title(f"Selected Images, outliers removed") pyplot.title(f"Selected Images, outliers removed")
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit) ax.set_ylim(parameters.charts_y_limit)
path = settings.data_dir / "7-selected-images-outliers-removed.png" path = parameters.data_dir / "7-selected-images-outliers-removed.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
return selected_df return selected_df
def smooth_rolling_min(selected_df, settings): def smooth_rolling_min(selected_df, parameters):
rm = ( rm = (
selected_df[f"{ROI_NAME}.average"] selected_df[parameters.roi_column]
.rolling(settings.rolling_min_window) .rolling(parameters.rolling_min_window)
.min() .min()
) )
# after a rolling window calculation, the first values will be NaN, # after a rolling window calculation, the first values will be NaN,
# we need to fill them # we need to fill them
selected_df[f"{ROI_NAME}.average.rolling.min"] = rm.fillna( selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna(
method="backfill" method="backfill"
) )
pyplot.clf() pyplot.clf()
ax = seaborn.scatterplot( ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average.rolling.min", data=selected_df x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df
) )
pyplot.title(f"Selected Images, outliers removed, rolling min applied") pyplot.title(f"Selected Images, outliers removed, rolling min applied")
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit) ax.set_ylim(parameters.charts_y_limit)
path = ( path = (
settings.data_dir parameters.data_dir
/ "8-selected-images-outliers-removed-rolling-min-applied.png" / "8-selected-images-outliers-removed-rolling-min-applied.png"
) )
pyplot.savefig(str(path)) pyplot.savefig(str(path))
@ -263,17 +285,17 @@ def smooth_rolling_min(selected_df, settings):
return selected_df return selected_df
def smooth_savgol_filter(selected_df, settings): def smooth_savgol_filter(selected_df, parameters):
filtered = savgol_filter( filtered = savgol_filter(
selected_df[f"{ROI_NAME}.average.rolling.min"], selected_df[f"{parameters.roi_name}.rolling.min"],
settings.savgol_filter_window, parameters.savgol_filter_window,
settings.savgol_filter_polynom, parameters.savgol_filter_polynom,
) )
selected_df[f"{ROI_NAME}.average.savgol"] = filtered selected_df[f"{parameters.roi_name}.savgol"] = filtered
pyplot.clf() pyplot.clf()
seaborn.lineplot( seaborn.lineplot(
x="frame", y=f"{ROI_NAME}.average.savgol", data=selected_df x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df
) )
pyplot.title( pyplot.title(
( (
@ -284,7 +306,7 @@ def smooth_savgol_filter(selected_df, settings):
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
path = ( path = (
settings.data_dir parameters.data_dir
/ "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png" / "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png"
) )
pyplot.savefig(str(path)) pyplot.savefig(str(path))
@ -292,46 +314,50 @@ def smooth_savgol_filter(selected_df, settings):
return selected_df return selected_df
def find_extremas(selected_df, settings): def find_extremas(selected_df, parameters):
indexes = peakutils.indexes( max_indexes = peakutils.indexes(
selected_df[f"{ROI_NAME}.average.savgol"], selected_df[f"{parameters.roi_name}.savgol"],
thres=settings.peak_threshold, thres=parameters.peak_threshold,
min_dist=settings.peak_min_distance, min_dist=parameters.peak_min_distance,
) )
maximas = selected_df.iloc[indexes].copy() maximas = selected_df.iloc[max_indexes].copy()
pyplot.clf() pyplot.clf()
peakplot_iloc( peakplot_iloc(
selected_df["frame"], selected_df["frame"],
selected_df[f"{ROI_NAME}.average.savgol"], selected_df[f"{parameters.roi_name}.savgol"],
indexes, max_indexes,
"maxima",
) )
pyplot.title(f"Finding Maximas") pyplot.title(f"Finding Maximas")
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
path = settings.data_dir / "10-finding-maximas.png" path = parameters.data_dir / "10-finding-maximas.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
inverted_series = ( inverted_series = (
max(selected_df[f"{ROI_NAME}.average.savgol"]) max(selected_df[f"{parameters.roi_name}.savgol"])
- selected_df[f"{ROI_NAME}.average.savgol"] - selected_df[f"{parameters.roi_name}.savgol"]
) )
indexes = peakutils.indexes( min_indexes = peakutils.indexes(
inverted_series, min_dist=settings.peak_min_distance inverted_series, min_dist=parameters.peak_min_distance
) )
minimas = selected_df.iloc[indexes].copy() minimas = selected_df.iloc[min_indexes].copy()
pyplot.clf() pyplot.clf()
peakplot_iloc( peakplot_iloc(
selected_df["frame"], selected_df["frame"],
selected_df[f"{ROI_NAME}.average.savgol"], selected_df[f"{parameters.roi_name}.savgol"],
indexes, max_indexes,
"maxima",
min_indexes,
"minima",
) )
pyplot.title(f"Finding Minimas") pyplot.title(f"Finding Minimas")
pyplot.xlabel("Frame Number [1]") pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Average Intensity [au]")
path = settings.data_dir / "11-finding-minimas.png" path = parameters.data_dir / "11-finding-minimas.png"
pyplot.savefig(str(path)) pyplot.savefig(str(path))
maximas["is_maxima"] = True maximas["is_maxima"] = True
@ -340,91 +366,38 @@ def find_extremas(selected_df, settings):
return extremas_df return extremas_df
def save_data(data_frame, selected_df, extremas_df, settings): def save_data(data_frame, selected_df, extremas_df, parameters):
path = settings.data_dir / "numeric-data.xlsx" path = parameters.data_dir / "numeric-data.xlsx"
writer = pandas.ExcelWriter(path, engine="xlsxwriter") writer = pandas.ExcelWriter(path, engine="xlsxwriter")
extremas_df.to_excel(writer, sheet_name="extremas") extremas_df.to_excel(writer, sheet_name="extremas")
selected_df.to_excel(writer, sheet_name="selected data") selected_df.to_excel(writer, sheet_name="selected data")
data_frame.to_excel(writer, sheet_name="raw data") data_frame.to_excel(writer, sheet_name="raw data")
ignore_settings = {"tif_list", "cuts_dir"} ignore_parameters = {"tif_list", "cuts_dir"}
tmp_settings = { tmp_parameters = {
k: [v] for k, v in settings.items() if k not in ignore_settings k: [v] for k, v in parameters.items() if k not in ignore_parameters
} }
tmp_setings_df = pandas.DataFrame(tmp_settings).T tmp_setings_df = pandas.DataFrame(tmp_parameters).T
tmp_setings_df.to_excel(writer, sheet_name="parameters") tmp_setings_df.to_excel(writer, sheet_name="parameters")
writer.save() writer.save()
def data_workflow(stats_results, settings): def save_temp(data_frame, parameters):
print("3/4: Data analysis") csv_path = parameters.tif_dir / "_data.csv"
set_plotting_styles()
data_frame = construct_data_frame(stats_results)
save_temp(data_frame, settings)
find_guard_value(data_frame, settings)
data_frame = check_guards(data_frame, settings)
data_frame = find_outliers(data_frame, settings)
selected_df = select_value_on_guards_and_outliers(data_frame, settings)
selected_df = smooth_rolling_min(selected_df, settings)
selected_df = smooth_savgol_filter(selected_df, settings)
extremas_df = find_extremas(selected_df, settings)
save_data(data_frame, selected_df, extremas_df, settings)
return list(selected_df["file"])
def save_temp(data_frame, settings):
csv_path = settings.tif_dir / "_data.csv"
data_frame.to_csv(csv_path, sep="\t") data_frame.to_csv(csv_path, sep="\t")
xls_path = settings.tif_dir / "_data.xlsx" parameters_path = parameters.tif_dir / "_parameters.pickle"
data_frame.to_excel(xls_path) with open(parameters_path, "wb") as fh:
settings_path = settings.tif_dir / "_settings.pickle" pickle.dump(parameters, fh)
with open(settings_path, "wb") as fh:
pickle.dump(settings, fh)
def load_temp(folder): def load_temp(folder):
dir_path = pathlib.Path(folder) dir_path = pathlib.Path(folder)
csv_path = dir_path / "_data.csv" csv_path = dir_path / "_data.csv"
data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0) data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0)
settings_path = dir_path / "_settings.pickle" parameters_path = dir_path / "_parameters.pickle"
with open(settings_path, "rb") as fh: with open(parameters_path, "rb") as fh:
settings = pickle.load(fh) parameters = pickle.load(fh)
settings.cut_pad_x = 50 return data_frame, parameters
settings.cut_pad_y = 25
settings.guard_histogram_bins = 100
settings.guard_filter_window = 7
settings.guard_filter_polynom = 3
settings.guards_minima_min_dist = 10
settings.rolling_min_window = 5
settings.savgol_filter_window = 51
settings.savgol_filter_polynom = 1
settings.peak_min_distance = 5
settings.peak_threshold = 0.3
return data_frame, settings
def cached_workflow(folder):
print("3/4: Data analysis")
set_plotting_styles()
data_frame, settings = load_temp(folder)
find_guard_value(data_frame, settings)
data_frame = check_guards(data_frame, settings)
data_frame = find_outliers(data_frame, settings)
selected_df = select_value_on_guards_and_outliers(data_frame, settings)
selected_df = smooth_rolling_min(selected_df, settings)
selected_df = smooth_savgol_filter(selected_df, settings)
extremas_df = find_extremas(selected_df, settings)
save_data(data_frame, selected_df, extremas_df, settings)
return list(selected_df["file"]), settings

112
mtor/imageproc.py

@ -1,32 +1,13 @@
from collections import namedtuple
from functools import partial
from multiprocessing import Pool
from PIL import Image from PIL import Image
from tqdm import tqdm
import numpy import numpy
from .luts import FIRE_LUT from .commons import (
TifArray,
IMG_MAX_INTENSITY,
STATS_FUNCS = { FIRE_LUT,
"min": numpy.amin, ROI_STATISTIC_FUNCTIONS,
"max": numpy.amax, )
"1quantile": partial(numpy.percentile, q=25),
"2quantile": partial(numpy.percentile, q=50),
"3quantile": partial(numpy.percentile, q=75),
"average": numpy.average,
"median": numpy.median,
"std": numpy.std,
}
LEFT_GUARD = "left"
ROI_NAME = "roi"
RIGHT_GUARD = "right"
ROI_BORDER_INTENSITY = 2**15
TifArray = namedtuple("TifArray", ["path", "frame", "data"])
def open_as_array(tif_image): def open_as_array(tif_image):
@ -38,19 +19,19 @@ def open_as_array(tif_image):
) )
def scale_and_colorize(tif_array, settings): def scale_and_colorize(tif_array, parameters):
adjusted_array = (tif_array.data - settings.offset) // settings.scale adjusted_array = (tif_array.data - parameters.offset) // parameters.scale
# paint roi # paint roi
top = settings.roi_top top = parameters.roi_top
bottom = settings.roi_bottom bottom = parameters.roi_bottom
right = settings.roi_right right = parameters.roi_right
left = settings.roi_left left = parameters.roi_left
adjusted_array[top, left:right] = ROI_BORDER_INTENSITY adjusted_array[top, left:right] = IMG_MAX_INTENSITY
adjusted_array[bottom, left:right] = ROI_BORDER_INTENSITY adjusted_array[bottom, left:right] = IMG_MAX_INTENSITY
adjusted_array[top:bottom, left] = ROI_BORDER_INTENSITY adjusted_array[top:bottom, left] = IMG_MAX_INTENSITY
adjusted_array[top:bottom, right] = ROI_BORDER_INTENSITY adjusted_array[top:bottom, right] = IMG_MAX_INTENSITY
adjusted = Image.fromarray(adjusted_array) adjusted = Image.fromarray(adjusted_array)
converted = adjusted.convert(mode="L").convert(mode="P") converted = adjusted.convert(mode="L").convert(mode="P")
@ -58,21 +39,21 @@ def scale_and_colorize(tif_array, settings):
return converted.convert("RGB") return converted.convert("RGB")
def safe_color_coded(tif_array, settings): def safe_color_coded(tif_array, parameters):
png_image = scale_and_colorize(tif_array, settings) png_image = scale_and_colorize(tif_array, parameters)
png_path = settings.colored_dir / (tif_array.path.stem + ".jpg") png_path = parameters.colored_dir / (tif_array.path.stem + ".jpg")
with png_path.open("wb") as outhandle: with png_path.open("wb") as outhandle:
png_image.save(outhandle) png_image.save(outhandle)
del png_image del png_image
def safe_cut(tif_array, settings): def safe_cut(tif_array, parameters):
cut_path = settings.cuts_dir / (tif_array.path.stem + "_cut.tif") cut_path = parameters.cuts_dir / tif_array.path.name
top = settings.cut_top top = parameters.cut_top
bottom = settings.cut_bottom bottom = parameters.cut_bottom
right = settings.cut_right right = parameters.cut_right
left = settings.cut_left left = parameters.cut_left
with cut_path.open("wb") as outhandle: with cut_path.open("wb") as outhandle:
cut_array = tif_array.data[top:bottom, left:right] cut_array = tif_array.data[top:bottom, left:right]
@ -81,42 +62,35 @@ def safe_cut(tif_array, settings):
del cut_img del cut_img
def get_roi(tif_array, settings): def get_roi(tif_array, parameters):
top = settings.roi_top top = parameters.roi_top
bottom = settings.roi_bottom bottom = parameters.roi_bottom
right = settings.roi_right right = parameters.roi_right
left = settings.roi_left left = parameters.roi_left
return tif_array.data[top:bottom, left:right] return tif_array.data[top:bottom, left:right]
def analyse_roi(tif_array, settings): def analyse_roi(tif_array, parameters):
data = get_roi(tif_array, settings) data = get_roi(tif_array, parameters)
results = {} results = {}
results.update(get_stats(data[:, 1], LEFT_GUARD)) results.update(get_stats(data[:, 1], parameters.left_guard_name))
results.update(get_stats(data[:, 1:-1], ROI_NAME)) results.update(get_stats(data[:, 1:-1], parameters.roi_name))
results.update(get_stats(data[:, -1], RIGHT_GUARD)) results.update(get_stats(data[:, -1], parameters.right_guard_name))
return results return results
def get_stats(array, prefix): def get_stats(array, prefix):
return {f"{prefix}.{k}": f(array) for k, f in STATS_FUNCS.items()} return {
f"{prefix}.{name}": func(array)
for name, func in ROI_STATISTIC_FUNCTIONS.items()
}
def process_one_tif(tif_image, settings): def process_one_tif(tif_image, parameters):
tif_array = open_as_array(tif_image) tif_array = open_as_array(tif_image)
safe_color_coded(tif_array, settings) safe_color_coded(tif_array, parameters)
safe_cut(tif_array, settings) safe_cut(tif_array, parameters)
stats_result = analyse_roi(tif_array, settings) stats_result = analyse_roi(tif_array, parameters)
stats_result["file"] = tif_image.path.name stats_result["file"] = tif_image.path.name
stats_result["frame"] = tif_image.frame stats_result["frame"] = tif_image.frame
return stats_result return stats_result
def image_workflow(settings):
print("2/4: Image analysis and conversion")
func = partial(process_one_tif, settings=settings)
tif_list = settings.tif_list
num_items = len(tif_list)
with Pool(4) as p:
stat_results = list(tqdm(p.imap(func, tif_list), total=num_items))
return stat_results

179
mtor/parameters.py

@ -0,0 +1,179 @@
import pathlib
from PIL import Image
from .commons import RE_DIGITS, OUTPUT_FOLDER_NAMES, TifImage
class Parameters(dict):
""" namespacing the parameters for a analysis with nice access
the following parameters are used:
boost: linear factor for brightness on color coded images
charts_y_limit: common scale for the y axis in graphs
colored_dir: path to the directory to hold color coded images
cut_bottom: bottom edge for croping an image
cut_left: left edge for croping an image
cut_pad_x: pixels added to left or right edge to crop an image
cut_pad_y: pixels added to top or bottom edge to crop an image
cut_right: right edge for croping an image
cut_top: left edge for croping an image
cuts_discarded_dir: path to the directory of discarded croped images
cuts_selected_dir: path to the directory of selected croped images
data_dir: path to the directory for graphs and numeric data
folder: name of the directory that holds the tif images
guard_filter_polynom: polynomal scalar for smoothing the guards histogram
guard_filter_window: window scalar for smoothing the guards histogram
guard_histogram_bins: number of bins when constructing guards histogram
guard_max_value: calculated threshold for the gurards
guard_stats: which values to use for caclulation of guards
guards_minima_min_dist: min distance between minimas in guards histogram
image_height: height of an image
image_width: width of an image
intensity_max: highest intensity found in all images
intensity_min: lowest intensity found in all images
left_guard_column: name of the most used left guard value column
left_guard_name: left guard value group name
offset: calculated the lowest intensity to scale by
outlier_upper_limit: calculated threshold value for outliers
peak_min_distance: min distance for peaks in the final results
peak_threshold: threshold peak height in the final results
right_guard_column: name of the most used left guard value column
right_guard_name: right guard value group name
roi_bottom: bottom edge of the ROI
roi_column:
roi_left: bottom left of the ROI
roi_name: roi value group name
roi_right: bottom right of the ROI
roi_stats: which values to use for caclulation of roi
roi_top: top edge of the ROI
rolling_min_window: size of the window for the rolling min calculation
savgol_filter_polynom: polynomal scalar for smoothing the final results
savgol_filter_window: window scalar for smoothing the final results
scale: calculated factor to boost image intensities
tif_dir: path to the directory with the original tif images
"""
def __init__(self, folder, top, right, bottom, left, **kargs):
super().__init__(self)
self.folder = folder
# defaults
self.boost = 10
self.cut_pad_x = 50
self.cut_pad_y = 25
self.guard_histogram_bins = 100
self.guard_filter_window = 7
self.guard_filter_polynom = 3
self.guards_minima_min_dist = 10
self.rolling_min_window = 5
self.savgol_filter_window = 51
self.savgol_filter_polynom = 1
self.peak_min_distance = 5
self.peak_threshold = 0.3
# labels for data items
self.roi_name = "roi"
self.roi_stats = "average"
self.roi_column = f"{self.roi_name}.{self.roi_stats}"
self.left_guard_name = "left"
self.right_guard_name = "right"
self.guard_stats = "average"
self.left_guard_column = f"{self.left_guard_name}.{self.guard_stats}"
self.right_guard_column = f"{self.right_guard_name}.{self.guard_stats}"
# arbitrary keyword arguments, updating defaults if necessary
self.update(kargs)
# create list of images
self.tif_dir = pathlib.Path(folder)
if not self.tif_dir.is_dir():
raise IOError(f"Not a directory: {self.folder}")
self.tif_list = self.scan_image_dir()
# set the names of output directories
for name in OUTPUT_FOLDER_NAMES:
sub_path = self.tif_dir / name
self[f"{name}_dir"] = sub_path
# get image dimensions
width, height = self.get_size_of_one_image()
self.image_width = width
self.image_height = height
self.set_roi_and_cut_size(top, right, bottom, left)
def __getattr__(self, key):
""" return a parameters value as instance property """
return self[key]
def __setattr__(self, key, value):
""" set a parameters value via instance property """
self[key] = value
def __getstate__(self):
# Copy the object's state from self.__dict__ which contains
# all our instance attributes. Always use the dict.copy()
# method to avoid modifying the original state.
return self.__dict__.copy()
def __setstate__(self, state):
# Restore instance attributes (i.e., filename and lineno).
self.__dict__.update(state)
def scan_image_dir(self):
""" scan a folder for tif images """
visible = (
item
for item in self.tif_dir.iterdir()
if not item.stem.startswith(".")
)
tifs = (item for item in visible if item.suffix == ".tif")
results = []
for path in tifs:
try:
match = RE_DIGITS.search(path.name).group()
except AttributeError as ex:
raise ValueError(
f"no sequence number found in {path.name}"
) from ex
results.append(TifImage(path=path, frame=int(match)))
return results
def get_size_of_one_image(self):
""" get the dimension of one Image
the dimensions should be the same on all images. this is ensured in
an other workflow.
"""
image = self.tif_list[0]
with open(image.path, "rb") as filehandle:
image = Image.open(filehandle)
size = image.size
return size
def set_roi_and_cut_size(self, top, right, bottom, left):
if right < left:
left, right = right, left
if top > bottom:
top, bottom = bottom, top
if (
top < 0
or left < 0
or bottom > self.image_height
or right > self.image_width
):
raise ValueError("ROI out of bounce")
self.roi_top = top
self.roi_bottom = bottom
self.roi_right = right
self.roi_left = left
self.cut_top = min(self.image_height, top - self.cut_pad_y)
self.cut_bottom = max(0, bottom + self.cut_pad_y)
self.cut_right = min(self.image_width, right + self.cut_pad_x)
self.cut_left = max(0, left - self.cut_pad_x)

40
mtor/postproc.py

@ -0,0 +1,40 @@
import pathlib
from .commons import LABEL_SELECTED, LABEL_DISCARDED
def stem_file_list(selected_files):
return {pathlib.Path(filename).stem for filename in selected_files}
def rename_color_coded_images(file_stems, parameters):
rename_pairs = []
for path in parameters.colored_dir.iterdir():
if not path.stem.startswith("."):
label = (
LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED
)
new_name = path.with_name(f"{path.stem}_{label}{path.suffix}")
rename_pairs.append((path, new_name))
return rename_pairs
def sort_cut_images(file_stems, parameters):
sort_pairs = []
for path in parameters.cuts_dir.iterdir():
if not path.stem.startswith("."):
label = (
LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED
)
new_path = (
parameters[f"cuts_{label}_dir"]
/ f"{path.stem}_cut_{label}{path.suffix}"
)
sort_pairs.append((path, new_path))
return sort_pairs
def remove_cuts_dir(parameters):
for item in parameters["cuts_dir"].iterdir():
item.unlink()
parameters["cuts_dir"].rmdir()

161
mtor/prescan.py

@ -1,163 +1,36 @@
import re
from collections import namedtuple
from pathlib import Path
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
from .commons import OUTPUT_FOLDER_NAMES, IMG_MAX_INTENSITY
RE_DIGITS = re.compile(r"(\d+)")
LABEL_SELECTED = "selected"
LABEL_DISCARDED = "discarded"
TifImage = namedtuple("TifImage", ["path", "frame"])
class Settings(dict):
""" namespacing the constant values for a analysis with nice access """
def __getattr__(self, key):
""" return a settings value as instance property """
return self[key]
def __setattr__(self, key, value):
""" set a settings value via instance property """
self[key] = value
def __getstate__(self):
# Copy the object's state from self.__dict__ which contains
# all our instance attributes. Always use the dict.copy()
# method to avoid modifying the original state.
return self.__dict__.copy()
def __setstate__(self, state):
# Restore instance attributes (i.e., filename and lineno).
self.__dict__.update(state)
def scan_image_dir(settings):
""" scan a folder for tif images """
tif_dir = Path(settings.folder)
if not tif_dir.is_dir():
raise IOError(f"Not a directory: {settings.folder}")
visible = (i for i in tif_dir.iterdir() if not i.stem.startswith("."))
tifs = (item for item in visible if item.suffix == ".tif")
results = []
for path in tifs:
try:
match = RE_DIGITS.search(path.name).group()
except AttributeError as ex:
raise ValueError(
f"no sequence number found in {path.name}"
) from ex
results.append(TifImage(path=path, frame=int(match)))
settings.tif_dir = tif_dir
settings.tif_list = results
return settings
def prepare_output_dirs(settings):
dir_names = (
"colored",
"cuts",
f"cuts_{LABEL_DISCARDED}",
f"cuts_{LABEL_SELECTED}",
"data",
)
for subdir in dir_names:
sub_path = settings.tif_dir / subdir
sub_path.mkdir(exist_ok=True)
for item in sub_path.iterdir():
item.unlink()
settings[f"{subdir}_dir"] = sub_path
return settings
def pre_scan_tifs(settings): def scan_tifs(parameters):
""" scan a sequence of tif images for common values """ """ scan a sequence of tif images for common values """
lowest = 2 ** 16 lowest = IMG_MAX_INTENSITY
highest = 0 highest = 0
width = None
height = None
print("1/4: scanning tifs for common autocontrast values") for tif in tqdm(parameters.tif_list):
for tif in tqdm(settings.tif_list):
with tif.path.open("rb") as filehandle: with tif.path.open("rb") as filehandle:
image = Image.open(filehandle) image = Image.open(filehandle)
mi, ma = image.getextrema() mi, ma = image.getextrema()
lowest = min(lowest, mi) lowest = min(lowest, mi)
highest = max(highest, ma) highest = max(highest, ma)
if width is None:
width, height = image.size
elif width != image.width or height != image.height:
raise ValueError("Images have different sizes")
print(f"intensity range: {lowest} to {highest}")
print(f"image size: {width} x {height} pixels")
settings.intensity_min = lowest
settings.intensity_max = highest
settings.image_width = width
settings.image_height = height
return settings
def check_roi_size(settings, top, right, bottom, left):
if ( if (
top > settings.image_height image.width != parameters.image_width
or bottom > settings.image_height or image.height != parameters.image_height
or right > settings.image_width
or left > settings.image_width
): ):
raise ValueError("ROI out of bounce") raise ValueError("Images have different sizes")
if right < left:
left, right = right, left
if bottom < top:
top, bottom = bottom, top
settings.roi_top = top
settings.roi_bottom = bottom
settings.roi_right = right
settings.roi_left = left
settings.cut_top = min(settings.image_height, top - settings.cut_pad_y)
settings.cut_bottom = max(0, bottom + settings.cut_pad_y)
settings.cut_right = min(settings.image_width, right + settings.cut_pad_x)
settings.cut_left = max(0, left - settings.cut_pad_x)
return settings
def prescan_workflow(folder, top, right, bottom, left, boost=10, **kargs):
settings = Settings()
settings.folder = folder
settings.boost = boost
settings.cut_pad_x = 50
settings.cut_pad_y = 25
settings.guard_histogram_bins = 100
settings.guard_filter_window = 7
settings.guard_filter_polynom = 3
settings.guards_minima_min_dist = 10
settings.rolling_min_window = 5
settings.savgol_filter_window = 51
settings.savgol_filter_polynom = 1
settings.peak_min_distance = 5
settings.peak_threshold = 0.3
settings.update(kargs) parameters.intensity_min = lowest
parameters.intensity_max = highest
settings = scan_image_dir(settings) return parameters
settings = prepare_output_dirs(settings)
settings = pre_scan_tifs(settings)
settings = check_roi_size(settings, top, right, bottom, left)
settings.offset = settings.intensity_min
max_offset = settings.intensity_max - settings.intensity_min
settings.scale = max_offset / 255 / boost
return settings def prepare_output_dirs(parameters):
for name in OUTPUT_FOLDER_NAMES:
sub_path = parameters[f"{name}_dir"]
sub_path.mkdir(exist_ok=True)
for item in sub_path.iterdir():
item.unlink()
return parameters

108
mtor/workflows.py

@ -0,0 +1,108 @@
import multiprocessing
import functools
from tqdm import tqdm
from .commons import WorkflowResult
from .parameters import Parameters
from .prescan import scan_tifs, prepare_output_dirs
from .imageproc import process_one_tif
from .dataproc import (
set_plotting_styles,
construct_data_frame,
find_guard_threshold,
check_guards,
find_outliers,
select_on_guards_and_outliers,
smooth_rolling_min,
smooth_savgol_filter,
find_extremas,
save_data,
)
from .postproc import (
stem_file_list,
rename_color_coded_images,
sort_cut_images,
remove_cuts_dir,
)
def prescan_workflow(folder, top, right, bottom, left, **kargs):
print("1/4: scanning tifs for common autocontrast values")
parameters = Parameters(folder, top, right, bottom, left)
parameters = scan_tifs(parameters)
parameters.offset = parameters.intensity_min
max_offset = parameters.intensity_max - parameters.intensity_min
parameters.scale = max_offset / 255 / parameters.boost
prepare_output_dirs(parameters)
return WorkflowResult(None, parameters)
def image_workflow(parameters):
print("2/4: Image analysis and conversion")
func = functools.partial(process_one_tif, parameters=parameters)
tif_list = parameters.tif_list
num_items = len(tif_list)
cpu_count = multiprocessing.cpu_count()
with multiprocessing.Pool(cpu_count) as mpp:
stat_results = list(tqdm(mpp.imap(func, tif_list), total=num_items))
return WorkflowResult(stat_results, parameters)
def data_workflow(stats_results, parameters):
from .dataproc import save_temp
print("3/4: Data analysis")
set_plotting_styles()
data_frame = construct_data_frame(stats_results, parameters)
save_temp(data_frame, parameters)
find_guard_threshold(data_frame, parameters)
data_frame = check_guards(data_frame, parameters)
data_frame = find_outliers(data_frame, parameters)
selected_df = select_on_guards_and_outliers(data_frame, parameters)
selected_df = smooth_rolling_min(selected_df, parameters)
selected_df = smooth_savgol_filter(selected_df, parameters)
extremas_df = find_extremas(selected_df, parameters)
save_data(data_frame, selected_df, extremas_df, parameters)
return WorkflowResult(list(selected_df["file"]), parameters)
def postprocessing_workflow(selected_files, parameters):
print("4/4: Post processing")
file_stems = stem_file_list(selected_files)
cc_rename_pairs = rename_color_coded_images(file_stems, parameters)
cut_sort_pairs = sort_cut_images(file_stems, parameters)
file_pairs = cc_rename_pairs + cut_sort_pairs
for old_path, new_path in tqdm(file_pairs):
old_path.rename(new_path)
remove_cuts_dir(parameters)
def cached_data_workflow(folder):
from .dataproc import load_temp
print("3/4: Data analysis")
set_plotting_styles()
data_frame, parameters = load_temp(folder)
find_guard_threshold(data_frame, parameters)
data_frame = check_guards(data_frame, parameters)
data_frame = find_outliers(data_frame, parameters)
selected_df = select_on_guards_and_outliers(data_frame, parameters)
selected_df = smooth_rolling_min(selected_df, parameters)
selected_df = smooth_savgol_filter(selected_df, parameters)
extremas_df = find_extremas(selected_df, parameters)
save_data(data_frame, selected_df, extremas_df, parameters)
return WorkflowResult(list(selected_df["file"]), parameters)
Loading…
Cancel
Save