Browse Source

restructured modules

master
Holger Frey 6 years ago
parent
commit
59813146e7
  1. 5
      ideas.md
  2. 24
      mtor/__init__.py
  3. 38
      mtor/commons.py
  4. 277
      mtor/dataproc.py
  5. 112
      mtor/imageproc.py
  6. 179
      mtor/parameters.py
  7. 40
      mtor/postproc.py
  8. 161
      mtor/prescan.py
  9. 108
      mtor/workflows.py

5
ideas.md

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Furhter Ideas:
- color the roi
- create a movie
- automatic use of cached values

24
mtor/__init__.py

@ -1,21 +1,23 @@ @@ -1,21 +1,23 @@
import warnings
from .prescan import prescan_workflow
from .imageproc import image_workflow
from .dataproc import data_workflow, cached_workflow
from .filesort import filesort_workflow
from .workflows import (
prescan_workflow,
image_workflow,
data_workflow,
cached_data_workflow,
postprocessing_workflow,
)
warnings.filterwarnings("ignore")
def run():
#settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660)
settings = prescan_workflow("mtor-bilder", 50, 725+150, 300, 725)
stats_results = image_workflow(settings)
selected_files = data_workflow(stats_results, settings)
filesort_workflow(selected_files, settings)
# settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660)
pw = prescan_workflow("mtor-bilder", 50, 725 + 150, 300, 725)
iw = image_workflow(pw.parameters)
dw = data_workflow(iw.data, iw.parameters)
fw = postprocessing_workflow(dw.data, dw.parameters) # noqa: F841
def run_data():
selected_files, settings = cached_workflow("mtor-bilder")
filesort_workflow(selected_files, settings)
cw = cached_data_workflow("mtor-bilder")

38
mtor/commons.py

@ -0,0 +1,38 @@ @@ -0,0 +1,38 @@
import functools
import numpy
import re
from collections import namedtuple
from .luts import FIRE_LUT # noqa: F401
IMG_MAX_INTENSITY = 65535 # 2^16 - 1
LABEL_SELECTED = "selected"
LABEL_DISCARDED = "discarded"
OUTPUT_FOLDER_NAMES = (
"colored",
"cuts",
f"cuts_{LABEL_DISCARDED}",
f"cuts_{LABEL_SELECTED}",
"data",
)
RE_DIGITS = re.compile(r"(\d+)")
ROI_STATISTIC_FUNCTIONS = {
"min": numpy.amin,
"max": numpy.amax,
"1quantile": functools.partial(numpy.percentile, q=25),
"2quantile": functools.partial(numpy.percentile, q=50),
"3quantile": functools.partial(numpy.percentile, q=75),
"average": numpy.average,
"median": numpy.median,
"std": numpy.std,
}
TifImage = namedtuple("TifImage", ["path", "frame"])
TifArray = namedtuple("TifArray", ["path", "frame", "data"])
WorkflowResult = namedtuple("WorkflowResult", ["data", "parameters"])

277
mtor/dataproc.py

@ -6,13 +6,12 @@ import pickle @@ -6,13 +6,12 @@ import pickle
import seaborn
import matplotlib.pyplot as pyplot
from peakutils.plot import plot as peakplot
from scipy.signal import savgol_filter
from .imageproc import STATS_FUNCS, LEFT_GUARD, ROI_NAME, RIGHT_GUARD
from .commons import ROI_STATISTIC_FUNCTIONS
def peakplot_iloc(x, y, ind):
def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"):
"""
Plots the original data with the peaks that were identified
@ -29,14 +28,25 @@ def peakplot_iloc(x, y, ind): @@ -29,14 +28,25 @@ def peakplot_iloc(x, y, ind):
ind : array-like
Indexes of the identified peaks
"""
pyplot.plot(x, y, "--")
pyplot.plot(x, y)
num_items = len(ix1)
pyplot.plot(
x.iloc[ind],
y.iloc[ind],
x.iloc[ix1],
y.iloc[ix1],
"r+",
ms=5,
mew=2,
label="{} peaks".format(len(ind)),
label=f"{num_items} {ix1_label}",
)
if ix2 is not None:
num_items = len(ix2)
pyplot.plot(
x.iloc[ix2],
y.iloc[ix2],
"g1",
ms=5,
mew=2,
label=f"{num_items} {ix2_label}",
)
pyplot.legend()
@ -56,26 +66,31 @@ def set_plotting_styles(): @@ -56,26 +66,31 @@ def set_plotting_styles():
seaborn.set_context("talk")
def init_pandas_data_frame():
def init_pandas_data_frame(parameters):
columns = ["file", "frame"]
for prefix in (LEFT_GUARD, ROI_NAME, RIGHT_GUARD):
for key in STATS_FUNCS:
columns.append(f"{prefix}.{key}")
groups = (
parameters.roi_name,
parameters.left_guard_name,
parameters.right_guard_name,
)
for group in groups:
for func_name in ROI_STATISTIC_FUNCTIONS:
columns.append(f"{group}.{func_name}")
return pandas.DataFrame(columns=columns)
def construct_data_frame(stats_results):
data_frame = init_pandas_data_frame()
def construct_data_frame(stats_results, parameters):
data_frame = init_pandas_data_frame(parameters)
data_frame = data_frame.append(stats_results, ignore_index=True)
return data_frame.sort_values(by=["frame"]).reset_index(drop=True)
def find_guard_value(data_frame, settings):
left_avg = data_frame["left.average"]
right_avg = data_frame["right.average"]
guards_avg = left_avg.append(right_avg, ignore_index=True)
def find_guard_threshold(data_frame, parameters):
left_values = data_frame[parameters.left_guard_column]
right_values = data_frame[parameters.right_guard_column]
guard_values = left_values.append(right_values, ignore_index=True)
guard_data = numpy.histogram(
guards_avg, bins=settings.guard_histogram_bins
guard_values, bins=parameters.guard_histogram_bins
)
guard_counts = guard_data[0].astype(numpy.float16)
guard_edges = guard_data[1][1:] # edges enclose the counts
@ -85,13 +100,15 @@ def find_guard_value(data_frame, settings): @@ -85,13 +100,15 @@ def find_guard_value(data_frame, settings):
pyplot.title("Histogram of Guard Avarages (not filtered)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = settings.data_dir / "1-histogram-of-guard-avarages-not-filtered.png"
path = (
parameters.data_dir / "1-histogram-of-guard-avarages-not-filtered.png"
)
pyplot.savefig(str(path))
guard_counts_filtered = savgol_filter(
guard_counts,
settings.guard_filter_window,
settings.guard_filter_polynom,
parameters.guard_filter_window,
parameters.guard_filter_polynom,
)
pyplot.clf()
@ -99,7 +116,7 @@ def find_guard_value(data_frame, settings): @@ -99,7 +116,7 @@ def find_guard_value(data_frame, settings):
pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = settings.data_dir / "2-histogram-of-guard-avarages-filtered.png"
path = parameters.data_dir / "2-histogram-of-guard-avarages-filtered.png"
pyplot.savefig(str(path))
# Finding the first minima after the first peak
@ -114,47 +131,53 @@ def find_guard_value(data_frame, settings): @@ -114,47 +131,53 @@ def find_guard_value(data_frame, settings):
inverted_series = max(guard_counts_filtered) - guard_counts_filtered
indexes = peakutils.indexes(
inverted_series[first_peak_position:],
min_dist=settings.guards_minima_min_dist,
min_dist=parameters.guards_minima_min_dist,
)
# since we shortened the data, we need to add the first peak position
first_minima_position = indexes[0] + first_peak_position
settings.guard_max_value = guard_edges[first_minima_position]
parameters.guard_max_value = guard_edges[first_minima_position]
pyplot.clf()
peakplot(guard_edges, guard_counts_filtered, [first_minima_position])
peakplot_iloc(
pandas.Series(guard_edges), pandas.Series(guard_counts_filtered), [first_minima_position], "minima"
)
pyplot.title(
(
f"Histogram of Guard Avarages (Savitzky-Golay filter),"
f" first minima at {int(settings.guard_max_value)} au"
f" first minima at {int(parameters.guard_max_value)} au"
)
)
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = (
settings.data_dir
parameters.data_dir
/ "3-histogram-of-guard-avarages-filtered-with-first-minima.png"
)
pyplot.savefig(str(path))
def check_guards(data_frame, settings):
for what in (LEFT_GUARD, RIGHT_GUARD):
def check_guards(data_frame, parameters):
for what in (parameters.left_guard_name, parameters.right_guard_name):
ok_col = f"{what}.ok"
data_frame[ok_col] = False
mask = data_frame[f"{what}.average"] < settings.guard_max_value
mask = (
data_frame[f"{what}.{parameters.guard_stats}"]
< parameters.guard_max_value
)
data_frame.loc[mask, ok_col] = True
data_frame["guards.ok"] = (
data_frame[f"{LEFT_GUARD}.ok"] & data_frame[f"{RIGHT_GUARD}.ok"]
data_frame[f"{parameters.left_guard_name}.ok"]
& data_frame[f"{parameters.right_guard_name}.ok"]
)
mask = data_frame["guards.ok"] == True
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame",
y=f"{ROI_NAME}.average",
y=parameters.roi_column,
data=data_frame,
hue="guards.ok",
hue_order=[True, False],
@ -163,13 +186,13 @@ def check_guards(data_frame, settings): @@ -163,13 +186,13 @@ def check_guards(data_frame, settings):
pyplot.title("Selection based on guard values")
pyplot.ylabel("Average Intensity [au]")
pyplot.ylabel("Frame Number [1]")
path = settings.data_dir / "4-image-selection-based-on-guard-values.png"
path = parameters.data_dir / "4-image-selection-based-on-guard-values.png"
pyplot.savefig(str(path))
settings.charts_y_limit = ax.get_ylim()
parameters.charts_y_limit = ax.get_ylim()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average", data=guarded_df
x="frame", y=parameters.roi_column, data=guarded_df
)
count_all_images = len(data_frame)
count_guarded_images = len(guarded_df)
@ -181,81 +204,80 @@ def check_guards(data_frame, settings): @@ -181,81 +204,80 @@ def check_guards(data_frame, settings):
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit)
path = settings.data_dir / "5-selected-values-based-on-guard-values.png"
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / "5-selected-values-based-on-guard-values.png"
pyplot.savefig(str(path))
return data_frame
def find_outliers(data_frame, settings):
def find_outliers(data_frame, parameters):
mask = data_frame["guards.ok"] == True
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
seaborn.boxplot(data=guarded_df, x=f"{ROI_NAME}.average")
seaborn.boxplot(data=guarded_df, x=parameters.roi_column)
pyplot.title(f"Boxblot of guarded values")
pyplot.xlabel("Average Intensity [au]")
path = settings.data_dir / "6-boxplot-of-guarded-values.png"
path = parameters.data_dir / "6-boxplot-of-guarded-values.png"
pyplot.savefig(str(path))
lower_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.25)
upper_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.75)
lower_quartil = guarded_df[parameters.roi_column].quantile(0.25)
upper_quartil = guarded_df[parameters.roi_column].quantile(0.75)
inter_quartil_range = upper_quartil - lower_quartil
settings.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
data_frame["outlier.ok"] = (
data_frame[f"{ROI_NAME}.average"] < settings.outlier_upper_limit
data_frame[parameters.roi_column] < parameters.outlier_upper_limit
)
return data_frame
def select_value_on_guards_and_outliers(data_frame, settings):
def select_on_guards_and_outliers(data_frame, parameters):
data_frame["outlier_guards.ok"] = (
data_frame["guards.ok"] & data_frame["outlier.ok"]
)
mask = data_frame["outlier_guards.ok"] == True
mask = data_frame["outlier_guards.ok"] == True # noqa: E712
selected_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average", data=selected_df
x="frame", y=parameters.roi_column, data=selected_df
)
pyplot.title(f"Selected Images, outliers removed")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit)
path = settings.data_dir / "7-selected-images-outliers-removed.png"
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / "7-selected-images-outliers-removed.png"
pyplot.savefig(str(path))
return selected_df
def smooth_rolling_min(selected_df, settings):
def smooth_rolling_min(selected_df, parameters):
rm = (
selected_df[f"{ROI_NAME}.average"]
.rolling(settings.rolling_min_window)
selected_df[parameters.roi_column]
.rolling(parameters.rolling_min_window)
.min()
)
# after a rolling window calculation, the first values will be NaN,
# we need to fill them
selected_df[f"{ROI_NAME}.average.rolling.min"] = rm.fillna(
selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna(
method="backfill"
)
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=f"{ROI_NAME}.average.rolling.min", data=selected_df
x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df
)
pyplot.title(f"Selected Images, outliers removed, rolling min applied")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(settings.charts_y_limit)
ax.set_ylim(parameters.charts_y_limit)
path = (
settings.data_dir
parameters.data_dir
/ "8-selected-images-outliers-removed-rolling-min-applied.png"
)
pyplot.savefig(str(path))
@ -263,17 +285,17 @@ def smooth_rolling_min(selected_df, settings): @@ -263,17 +285,17 @@ def smooth_rolling_min(selected_df, settings):
return selected_df
def smooth_savgol_filter(selected_df, settings):
def smooth_savgol_filter(selected_df, parameters):
filtered = savgol_filter(
selected_df[f"{ROI_NAME}.average.rolling.min"],
settings.savgol_filter_window,
settings.savgol_filter_polynom,
selected_df[f"{parameters.roi_name}.rolling.min"],
parameters.savgol_filter_window,
parameters.savgol_filter_polynom,
)
selected_df[f"{ROI_NAME}.average.savgol"] = filtered
selected_df[f"{parameters.roi_name}.savgol"] = filtered
pyplot.clf()
seaborn.lineplot(
x="frame", y=f"{ROI_NAME}.average.savgol", data=selected_df
x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df
)
pyplot.title(
(
@ -284,7 +306,7 @@ def smooth_savgol_filter(selected_df, settings): @@ -284,7 +306,7 @@ def smooth_savgol_filter(selected_df, settings):
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = (
settings.data_dir
parameters.data_dir
/ "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png"
)
pyplot.savefig(str(path))
@ -292,46 +314,50 @@ def smooth_savgol_filter(selected_df, settings): @@ -292,46 +314,50 @@ def smooth_savgol_filter(selected_df, settings):
return selected_df
def find_extremas(selected_df, settings):
def find_extremas(selected_df, parameters):
indexes = peakutils.indexes(
selected_df[f"{ROI_NAME}.average.savgol"],
thres=settings.peak_threshold,
min_dist=settings.peak_min_distance,
max_indexes = peakutils.indexes(
selected_df[f"{parameters.roi_name}.savgol"],
thres=parameters.peak_threshold,
min_dist=parameters.peak_min_distance,
)
maximas = selected_df.iloc[indexes].copy()
maximas = selected_df.iloc[max_indexes].copy()
pyplot.clf()
peakplot_iloc(
selected_df["frame"],
selected_df[f"{ROI_NAME}.average.savgol"],
indexes,
selected_df[f"{parameters.roi_name}.savgol"],
max_indexes,
"maxima",
)
pyplot.title(f"Finding Maximas")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = settings.data_dir / "10-finding-maximas.png"
path = parameters.data_dir / "10-finding-maximas.png"
pyplot.savefig(str(path))
inverted_series = (
max(selected_df[f"{ROI_NAME}.average.savgol"])
- selected_df[f"{ROI_NAME}.average.savgol"]
max(selected_df[f"{parameters.roi_name}.savgol"])
- selected_df[f"{parameters.roi_name}.savgol"]
)
indexes = peakutils.indexes(
inverted_series, min_dist=settings.peak_min_distance
min_indexes = peakutils.indexes(
inverted_series, min_dist=parameters.peak_min_distance
)
minimas = selected_df.iloc[indexes].copy()
minimas = selected_df.iloc[min_indexes].copy()
pyplot.clf()
peakplot_iloc(
selected_df["frame"],
selected_df[f"{ROI_NAME}.average.savgol"],
indexes,
selected_df[f"{parameters.roi_name}.savgol"],
max_indexes,
"maxima",
min_indexes,
"minima",
)
pyplot.title(f"Finding Minimas")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = settings.data_dir / "11-finding-minimas.png"
path = parameters.data_dir / "11-finding-minimas.png"
pyplot.savefig(str(path))
maximas["is_maxima"] = True
@ -340,91 +366,38 @@ def find_extremas(selected_df, settings): @@ -340,91 +366,38 @@ def find_extremas(selected_df, settings):
return extremas_df
def save_data(data_frame, selected_df, extremas_df, settings):
path = settings.data_dir / "numeric-data.xlsx"
def save_data(data_frame, selected_df, extremas_df, parameters):
path = parameters.data_dir / "numeric-data.xlsx"
writer = pandas.ExcelWriter(path, engine="xlsxwriter")
extremas_df.to_excel(writer, sheet_name="extremas")
selected_df.to_excel(writer, sheet_name="selected data")
data_frame.to_excel(writer, sheet_name="raw data")
ignore_settings = {"tif_list", "cuts_dir"}
tmp_settings = {
k: [v] for k, v in settings.items() if k not in ignore_settings
ignore_parameters = {"tif_list", "cuts_dir"}
tmp_parameters = {
k: [v] for k, v in parameters.items() if k not in ignore_parameters
}
tmp_setings_df = pandas.DataFrame(tmp_settings).T
tmp_setings_df = pandas.DataFrame(tmp_parameters).T
tmp_setings_df.to_excel(writer, sheet_name="parameters")
writer.save()
def data_workflow(stats_results, settings):
print("3/4: Data analysis")
set_plotting_styles()
data_frame = construct_data_frame(stats_results)
save_temp(data_frame, settings)
find_guard_value(data_frame, settings)
data_frame = check_guards(data_frame, settings)
data_frame = find_outliers(data_frame, settings)
selected_df = select_value_on_guards_and_outliers(data_frame, settings)
selected_df = smooth_rolling_min(selected_df, settings)
selected_df = smooth_savgol_filter(selected_df, settings)
extremas_df = find_extremas(selected_df, settings)
save_data(data_frame, selected_df, extremas_df, settings)
return list(selected_df["file"])
def save_temp(data_frame, settings):
csv_path = settings.tif_dir / "_data.csv"
def save_temp(data_frame, parameters):
csv_path = parameters.tif_dir / "_data.csv"
data_frame.to_csv(csv_path, sep="\t")
xls_path = settings.tif_dir / "_data.xlsx"
data_frame.to_excel(xls_path)
settings_path = settings.tif_dir / "_settings.pickle"
with open(settings_path, "wb") as fh:
pickle.dump(settings, fh)
parameters_path = parameters.tif_dir / "_parameters.pickle"
with open(parameters_path, "wb") as fh:
pickle.dump(parameters, fh)
def load_temp(folder):
dir_path = pathlib.Path(folder)
csv_path = dir_path / "_data.csv"
data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0)
settings_path = dir_path / "_settings.pickle"
with open(settings_path, "rb") as fh:
settings = pickle.load(fh)
settings.cut_pad_x = 50
settings.cut_pad_y = 25
settings.guard_histogram_bins = 100
settings.guard_filter_window = 7
settings.guard_filter_polynom = 3
settings.guards_minima_min_dist = 10
settings.rolling_min_window = 5
settings.savgol_filter_window = 51
settings.savgol_filter_polynom = 1
settings.peak_min_distance = 5
settings.peak_threshold = 0.3
return data_frame, settings
def cached_workflow(folder):
print("3/4: Data analysis")
set_plotting_styles()
data_frame, settings = load_temp(folder)
find_guard_value(data_frame, settings)
data_frame = check_guards(data_frame, settings)
data_frame = find_outliers(data_frame, settings)
selected_df = select_value_on_guards_and_outliers(data_frame, settings)
selected_df = smooth_rolling_min(selected_df, settings)
selected_df = smooth_savgol_filter(selected_df, settings)
extremas_df = find_extremas(selected_df, settings)
save_data(data_frame, selected_df, extremas_df, settings)
return list(selected_df["file"]), settings
parameters_path = dir_path / "_parameters.pickle"
with open(parameters_path, "rb") as fh:
parameters = pickle.load(fh)
return data_frame, parameters

112
mtor/imageproc.py

@ -1,32 +1,13 @@ @@ -1,32 +1,13 @@
from collections import namedtuple
from functools import partial
from multiprocessing import Pool
from PIL import Image
from tqdm import tqdm
import numpy
from .luts import FIRE_LUT
STATS_FUNCS = {
"min": numpy.amin,
"max": numpy.amax,
"1quantile": partial(numpy.percentile, q=25),
"2quantile": partial(numpy.percentile, q=50),
"3quantile": partial(numpy.percentile, q=75),
"average": numpy.average,
"median": numpy.median,
"std": numpy.std,
}
LEFT_GUARD = "left"
ROI_NAME = "roi"
RIGHT_GUARD = "right"
ROI_BORDER_INTENSITY = 2**15
TifArray = namedtuple("TifArray", ["path", "frame", "data"])
from .commons import (
TifArray,
IMG_MAX_INTENSITY,
FIRE_LUT,
ROI_STATISTIC_FUNCTIONS,
)
def open_as_array(tif_image):
@ -38,19 +19,19 @@ def open_as_array(tif_image): @@ -38,19 +19,19 @@ def open_as_array(tif_image):
)
def scale_and_colorize(tif_array, settings):
adjusted_array = (tif_array.data - settings.offset) // settings.scale
def scale_and_colorize(tif_array, parameters):
adjusted_array = (tif_array.data - parameters.offset) // parameters.scale
# paint roi
top = settings.roi_top
bottom = settings.roi_bottom
right = settings.roi_right
left = settings.roi_left
adjusted_array[top, left:right] = ROI_BORDER_INTENSITY
adjusted_array[bottom, left:right] = ROI_BORDER_INTENSITY
adjusted_array[top:bottom, left] = ROI_BORDER_INTENSITY
adjusted_array[top:bottom, right] = ROI_BORDER_INTENSITY
top = parameters.roi_top
bottom = parameters.roi_bottom
right = parameters.roi_right
left = parameters.roi_left
adjusted_array[top, left:right] = IMG_MAX_INTENSITY
adjusted_array[bottom, left:right] = IMG_MAX_INTENSITY
adjusted_array[top:bottom, left] = IMG_MAX_INTENSITY
adjusted_array[top:bottom, right] = IMG_MAX_INTENSITY
adjusted = Image.fromarray(adjusted_array)
converted = adjusted.convert(mode="L").convert(mode="P")
@ -58,21 +39,21 @@ def scale_and_colorize(tif_array, settings): @@ -58,21 +39,21 @@ def scale_and_colorize(tif_array, settings):
return converted.convert("RGB")
def safe_color_coded(tif_array, settings):
png_image = scale_and_colorize(tif_array, settings)
png_path = settings.colored_dir / (tif_array.path.stem + ".jpg")
def safe_color_coded(tif_array, parameters):
png_image = scale_and_colorize(tif_array, parameters)
png_path = parameters.colored_dir / (tif_array.path.stem + ".jpg")
with png_path.open("wb") as outhandle:
png_image.save(outhandle)
del png_image
def safe_cut(tif_array, settings):
cut_path = settings.cuts_dir / (tif_array.path.stem + "_cut.tif")
def safe_cut(tif_array, parameters):
cut_path = parameters.cuts_dir / tif_array.path.name
top = settings.cut_top
bottom = settings.cut_bottom
right = settings.cut_right
left = settings.cut_left
top = parameters.cut_top
bottom = parameters.cut_bottom
right = parameters.cut_right
left = parameters.cut_left
with cut_path.open("wb") as outhandle:
cut_array = tif_array.data[top:bottom, left:right]
@ -81,42 +62,35 @@ def safe_cut(tif_array, settings): @@ -81,42 +62,35 @@ def safe_cut(tif_array, settings):
del cut_img
def get_roi(tif_array, settings):
top = settings.roi_top
bottom = settings.roi_bottom
right = settings.roi_right
left = settings.roi_left
def get_roi(tif_array, parameters):
top = parameters.roi_top
bottom = parameters.roi_bottom
right = parameters.roi_right
left = parameters.roi_left
return tif_array.data[top:bottom, left:right]
def analyse_roi(tif_array, settings):
data = get_roi(tif_array, settings)
def analyse_roi(tif_array, parameters):
data = get_roi(tif_array, parameters)
results = {}
results.update(get_stats(data[:, 1], LEFT_GUARD))
results.update(get_stats(data[:, 1:-1], ROI_NAME))
results.update(get_stats(data[:, -1], RIGHT_GUARD))
results.update(get_stats(data[:, 1], parameters.left_guard_name))
results.update(get_stats(data[:, 1:-1], parameters.roi_name))
results.update(get_stats(data[:, -1], parameters.right_guard_name))
return results
def get_stats(array, prefix):
return {f"{prefix}.{k}": f(array) for k, f in STATS_FUNCS.items()}
return {
f"{prefix}.{name}": func(array)
for name, func in ROI_STATISTIC_FUNCTIONS.items()
}
def process_one_tif(tif_image, settings):
def process_one_tif(tif_image, parameters):
tif_array = open_as_array(tif_image)
safe_color_coded(tif_array, settings)
safe_cut(tif_array, settings)
stats_result = analyse_roi(tif_array, settings)
safe_color_coded(tif_array, parameters)
safe_cut(tif_array, parameters)
stats_result = analyse_roi(tif_array, parameters)
stats_result["file"] = tif_image.path.name
stats_result["frame"] = tif_image.frame
return stats_result
def image_workflow(settings):
print("2/4: Image analysis and conversion")
func = partial(process_one_tif, settings=settings)
tif_list = settings.tif_list
num_items = len(tif_list)
with Pool(4) as p:
stat_results = list(tqdm(p.imap(func, tif_list), total=num_items))
return stat_results

179
mtor/parameters.py

@ -0,0 +1,179 @@ @@ -0,0 +1,179 @@
import pathlib
from PIL import Image
from .commons import RE_DIGITS, OUTPUT_FOLDER_NAMES, TifImage
class Parameters(dict):
""" namespacing the parameters for a analysis with nice access
the following parameters are used:
boost: linear factor for brightness on color coded images
charts_y_limit: common scale for the y axis in graphs
colored_dir: path to the directory to hold color coded images
cut_bottom: bottom edge for croping an image
cut_left: left edge for croping an image
cut_pad_x: pixels added to left or right edge to crop an image
cut_pad_y: pixels added to top or bottom edge to crop an image
cut_right: right edge for croping an image
cut_top: left edge for croping an image
cuts_discarded_dir: path to the directory of discarded croped images
cuts_selected_dir: path to the directory of selected croped images
data_dir: path to the directory for graphs and numeric data
folder: name of the directory that holds the tif images
guard_filter_polynom: polynomal scalar for smoothing the guards histogram
guard_filter_window: window scalar for smoothing the guards histogram
guard_histogram_bins: number of bins when constructing guards histogram
guard_max_value: calculated threshold for the gurards
guard_stats: which values to use for caclulation of guards
guards_minima_min_dist: min distance between minimas in guards histogram
image_height: height of an image
image_width: width of an image
intensity_max: highest intensity found in all images
intensity_min: lowest intensity found in all images
left_guard_column: name of the most used left guard value column
left_guard_name: left guard value group name
offset: calculated the lowest intensity to scale by
outlier_upper_limit: calculated threshold value for outliers
peak_min_distance: min distance for peaks in the final results
peak_threshold: threshold peak height in the final results
right_guard_column: name of the most used left guard value column
right_guard_name: right guard value group name
roi_bottom: bottom edge of the ROI
roi_column:
roi_left: bottom left of the ROI
roi_name: roi value group name
roi_right: bottom right of the ROI
roi_stats: which values to use for caclulation of roi
roi_top: top edge of the ROI
rolling_min_window: size of the window for the rolling min calculation
savgol_filter_polynom: polynomal scalar for smoothing the final results
savgol_filter_window: window scalar for smoothing the final results
scale: calculated factor to boost image intensities
tif_dir: path to the directory with the original tif images
"""
def __init__(self, folder, top, right, bottom, left, **kargs):
super().__init__(self)
self.folder = folder
# defaults
self.boost = 10
self.cut_pad_x = 50
self.cut_pad_y = 25
self.guard_histogram_bins = 100
self.guard_filter_window = 7
self.guard_filter_polynom = 3
self.guards_minima_min_dist = 10
self.rolling_min_window = 5
self.savgol_filter_window = 51
self.savgol_filter_polynom = 1
self.peak_min_distance = 5
self.peak_threshold = 0.3
# labels for data items
self.roi_name = "roi"
self.roi_stats = "average"
self.roi_column = f"{self.roi_name}.{self.roi_stats}"
self.left_guard_name = "left"
self.right_guard_name = "right"
self.guard_stats = "average"
self.left_guard_column = f"{self.left_guard_name}.{self.guard_stats}"
self.right_guard_column = f"{self.right_guard_name}.{self.guard_stats}"
# arbitrary keyword arguments, updating defaults if necessary
self.update(kargs)
# create list of images
self.tif_dir = pathlib.Path(folder)
if not self.tif_dir.is_dir():
raise IOError(f"Not a directory: {self.folder}")
self.tif_list = self.scan_image_dir()
# set the names of output directories
for name in OUTPUT_FOLDER_NAMES:
sub_path = self.tif_dir / name
self[f"{name}_dir"] = sub_path
# get image dimensions
width, height = self.get_size_of_one_image()
self.image_width = width
self.image_height = height
self.set_roi_and_cut_size(top, right, bottom, left)
def __getattr__(self, key):
""" return a parameters value as instance property """
return self[key]
def __setattr__(self, key, value):
""" set a parameters value via instance property """
self[key] = value
def __getstate__(self):
# Copy the object's state from self.__dict__ which contains
# all our instance attributes. Always use the dict.copy()
# method to avoid modifying the original state.
return self.__dict__.copy()
def __setstate__(self, state):
# Restore instance attributes (i.e., filename and lineno).
self.__dict__.update(state)
def scan_image_dir(self):
""" scan a folder for tif images """
visible = (
item
for item in self.tif_dir.iterdir()
if not item.stem.startswith(".")
)
tifs = (item for item in visible if item.suffix == ".tif")
results = []
for path in tifs:
try:
match = RE_DIGITS.search(path.name).group()
except AttributeError as ex:
raise ValueError(
f"no sequence number found in {path.name}"
) from ex
results.append(TifImage(path=path, frame=int(match)))
return results
def get_size_of_one_image(self):
""" get the dimension of one Image
the dimensions should be the same on all images. this is ensured in
an other workflow.
"""
image = self.tif_list[0]
with open(image.path, "rb") as filehandle:
image = Image.open(filehandle)
size = image.size
return size
def set_roi_and_cut_size(self, top, right, bottom, left):
if right < left:
left, right = right, left
if top > bottom:
top, bottom = bottom, top
if (
top < 0
or left < 0
or bottom > self.image_height
or right > self.image_width
):
raise ValueError("ROI out of bounce")
self.roi_top = top
self.roi_bottom = bottom
self.roi_right = right
self.roi_left = left
self.cut_top = min(self.image_height, top - self.cut_pad_y)
self.cut_bottom = max(0, bottom + self.cut_pad_y)
self.cut_right = min(self.image_width, right + self.cut_pad_x)
self.cut_left = max(0, left - self.cut_pad_x)

40
mtor/postproc.py

@ -0,0 +1,40 @@ @@ -0,0 +1,40 @@
import pathlib
from .commons import LABEL_SELECTED, LABEL_DISCARDED
def stem_file_list(selected_files):
return {pathlib.Path(filename).stem for filename in selected_files}
def rename_color_coded_images(file_stems, parameters):
rename_pairs = []
for path in parameters.colored_dir.iterdir():
if not path.stem.startswith("."):
label = (
LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED
)
new_name = path.with_name(f"{path.stem}_{label}{path.suffix}")
rename_pairs.append((path, new_name))
return rename_pairs
def sort_cut_images(file_stems, parameters):
sort_pairs = []
for path in parameters.cuts_dir.iterdir():
if not path.stem.startswith("."):
label = (
LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED
)
new_path = (
parameters[f"cuts_{label}_dir"]
/ f"{path.stem}_cut_{label}{path.suffix}"
)
sort_pairs.append((path, new_path))
return sort_pairs
def remove_cuts_dir(parameters):
for item in parameters["cuts_dir"].iterdir():
item.unlink()
parameters["cuts_dir"].rmdir()

161
mtor/prescan.py

@ -1,163 +1,36 @@ @@ -1,163 +1,36 @@
import re
from collections import namedtuple
from pathlib import Path
from PIL import Image
from tqdm import tqdm
RE_DIGITS = re.compile(r"(\d+)")
LABEL_SELECTED = "selected"
LABEL_DISCARDED = "discarded"
TifImage = namedtuple("TifImage", ["path", "frame"])
class Settings(dict):
""" namespacing the constant values for a analysis with nice access """
def __getattr__(self, key):
""" return a settings value as instance property """
return self[key]
def __setattr__(self, key, value):
""" set a settings value via instance property """
self[key] = value
def __getstate__(self):
# Copy the object's state from self.__dict__ which contains
# all our instance attributes. Always use the dict.copy()
# method to avoid modifying the original state.
return self.__dict__.copy()
def __setstate__(self, state):
# Restore instance attributes (i.e., filename and lineno).
self.__dict__.update(state)
def scan_image_dir(settings):
""" scan a folder for tif images """
tif_dir = Path(settings.folder)
if not tif_dir.is_dir():
raise IOError(f"Not a directory: {settings.folder}")
visible = (i for i in tif_dir.iterdir() if not i.stem.startswith("."))
tifs = (item for item in visible if item.suffix == ".tif")
results = []
for path in tifs:
try:
match = RE_DIGITS.search(path.name).group()
except AttributeError as ex:
raise ValueError(
f"no sequence number found in {path.name}"
) from ex
results.append(TifImage(path=path, frame=int(match)))
settings.tif_dir = tif_dir
settings.tif_list = results
return settings
def prepare_output_dirs(settings):
dir_names = (
"colored",
"cuts",
f"cuts_{LABEL_DISCARDED}",
f"cuts_{LABEL_SELECTED}",
"data",
)
for subdir in dir_names:
sub_path = settings.tif_dir / subdir
sub_path.mkdir(exist_ok=True)
for item in sub_path.iterdir():
item.unlink()
settings[f"{subdir}_dir"] = sub_path
return settings
from .commons import OUTPUT_FOLDER_NAMES, IMG_MAX_INTENSITY
def pre_scan_tifs(settings):
def scan_tifs(parameters):
""" scan a sequence of tif images for common values """
lowest = 2 ** 16
lowest = IMG_MAX_INTENSITY
highest = 0
width = None
height = None
print("1/4: scanning tifs for common autocontrast values")
for tif in tqdm(settings.tif_list):
for tif in tqdm(parameters.tif_list):
with tif.path.open("rb") as filehandle:
image = Image.open(filehandle)
mi, ma = image.getextrema()
lowest = min(lowest, mi)
highest = max(highest, ma)
if width is None:
width, height = image.size
elif width != image.width or height != image.height:
raise ValueError("Images have different sizes")
print(f"intensity range: {lowest} to {highest}")
print(f"image size: {width} x {height} pixels")
settings.intensity_min = lowest
settings.intensity_max = highest
settings.image_width = width
settings.image_height = height
return settings
def check_roi_size(settings, top, right, bottom, left):
if (
top > settings.image_height
or bottom > settings.image_height
or right > settings.image_width
or left > settings.image_width
image.width != parameters.image_width
or image.height != parameters.image_height
):
raise ValueError("ROI out of bounce")
if right < left:
left, right = right, left
if bottom < top:
top, bottom = bottom, top
settings.roi_top = top
settings.roi_bottom = bottom
settings.roi_right = right
settings.roi_left = left
settings.cut_top = min(settings.image_height, top - settings.cut_pad_y)
settings.cut_bottom = max(0, bottom + settings.cut_pad_y)
settings.cut_right = min(settings.image_width, right + settings.cut_pad_x)
settings.cut_left = max(0, left - settings.cut_pad_x)
return settings
def prescan_workflow(folder, top, right, bottom, left, boost=10, **kargs):
settings = Settings()
settings.folder = folder
settings.boost = boost
settings.cut_pad_x = 50
settings.cut_pad_y = 25
settings.guard_histogram_bins = 100
settings.guard_filter_window = 7
settings.guard_filter_polynom = 3
settings.guards_minima_min_dist = 10
settings.rolling_min_window = 5
settings.savgol_filter_window = 51
settings.savgol_filter_polynom = 1
settings.peak_min_distance = 5
settings.peak_threshold = 0.3
raise ValueError("Images have different sizes")
settings.update(kargs)
parameters.intensity_min = lowest
parameters.intensity_max = highest
settings = scan_image_dir(settings)
settings = prepare_output_dirs(settings)
settings = pre_scan_tifs(settings)
settings = check_roi_size(settings, top, right, bottom, left)
return parameters
settings.offset = settings.intensity_min
max_offset = settings.intensity_max - settings.intensity_min
settings.scale = max_offset / 255 / boost
return settings
def prepare_output_dirs(parameters):
for name in OUTPUT_FOLDER_NAMES:
sub_path = parameters[f"{name}_dir"]
sub_path.mkdir(exist_ok=True)
for item in sub_path.iterdir():
item.unlink()
return parameters

108
mtor/workflows.py

@ -0,0 +1,108 @@ @@ -0,0 +1,108 @@
import multiprocessing
import functools
from tqdm import tqdm
from .commons import WorkflowResult
from .parameters import Parameters
from .prescan import scan_tifs, prepare_output_dirs
from .imageproc import process_one_tif
from .dataproc import (
set_plotting_styles,
construct_data_frame,
find_guard_threshold,
check_guards,
find_outliers,
select_on_guards_and_outliers,
smooth_rolling_min,
smooth_savgol_filter,
find_extremas,
save_data,
)
from .postproc import (
stem_file_list,
rename_color_coded_images,
sort_cut_images,
remove_cuts_dir,
)
def prescan_workflow(folder, top, right, bottom, left, **kargs):
print("1/4: scanning tifs for common autocontrast values")
parameters = Parameters(folder, top, right, bottom, left)
parameters = scan_tifs(parameters)
parameters.offset = parameters.intensity_min
max_offset = parameters.intensity_max - parameters.intensity_min
parameters.scale = max_offset / 255 / parameters.boost
prepare_output_dirs(parameters)
return WorkflowResult(None, parameters)
def image_workflow(parameters):
print("2/4: Image analysis and conversion")
func = functools.partial(process_one_tif, parameters=parameters)
tif_list = parameters.tif_list
num_items = len(tif_list)
cpu_count = multiprocessing.cpu_count()
with multiprocessing.Pool(cpu_count) as mpp:
stat_results = list(tqdm(mpp.imap(func, tif_list), total=num_items))
return WorkflowResult(stat_results, parameters)
def data_workflow(stats_results, parameters):
from .dataproc import save_temp
print("3/4: Data analysis")
set_plotting_styles()
data_frame = construct_data_frame(stats_results, parameters)
save_temp(data_frame, parameters)
find_guard_threshold(data_frame, parameters)
data_frame = check_guards(data_frame, parameters)
data_frame = find_outliers(data_frame, parameters)
selected_df = select_on_guards_and_outliers(data_frame, parameters)
selected_df = smooth_rolling_min(selected_df, parameters)
selected_df = smooth_savgol_filter(selected_df, parameters)
extremas_df = find_extremas(selected_df, parameters)
save_data(data_frame, selected_df, extremas_df, parameters)
return WorkflowResult(list(selected_df["file"]), parameters)
def postprocessing_workflow(selected_files, parameters):
print("4/4: Post processing")
file_stems = stem_file_list(selected_files)
cc_rename_pairs = rename_color_coded_images(file_stems, parameters)
cut_sort_pairs = sort_cut_images(file_stems, parameters)
file_pairs = cc_rename_pairs + cut_sort_pairs
for old_path, new_path in tqdm(file_pairs):
old_path.rename(new_path)
remove_cuts_dir(parameters)
def cached_data_workflow(folder):
from .dataproc import load_temp
print("3/4: Data analysis")
set_plotting_styles()
data_frame, parameters = load_temp(folder)
find_guard_threshold(data_frame, parameters)
data_frame = check_guards(data_frame, parameters)
data_frame = find_outliers(data_frame, parameters)
selected_df = select_on_guards_and_outliers(data_frame, parameters)
selected_df = smooth_rolling_min(selected_df, parameters)
selected_df = smooth_savgol_filter(selected_df, parameters)
extremas_df = find_extremas(selected_df, parameters)
save_data(data_frame, selected_df, extremas_df, parameters)
return WorkflowResult(list(selected_df["file"]), parameters)
Loading…
Cancel
Save