diff --git a/ideas.md b/ideas.md new file mode 100644 index 0000000..ad679a6 --- /dev/null +++ b/ideas.md @@ -0,0 +1,5 @@ +Furhter Ideas: + +- color the roi +- create a movie +- automatic use of cached values diff --git a/mtor/__init__.py b/mtor/__init__.py index e710262..4f47080 100644 --- a/mtor/__init__.py +++ b/mtor/__init__.py @@ -1,21 +1,23 @@ import warnings -from .prescan import prescan_workflow -from .imageproc import image_workflow -from .dataproc import data_workflow, cached_workflow -from .filesort import filesort_workflow +from .workflows import ( + prescan_workflow, + image_workflow, + data_workflow, + cached_data_workflow, + postprocessing_workflow, +) warnings.filterwarnings("ignore") def run(): - #settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660) - settings = prescan_workflow("mtor-bilder", 50, 725+150, 300, 725) - stats_results = image_workflow(settings) - selected_files = data_workflow(stats_results, settings) - filesort_workflow(selected_files, settings) + # settings = prescan_workflow("mtor-bilder", 50, 910, 300, 660) + pw = prescan_workflow("mtor-bilder", 50, 725 + 150, 300, 725) + iw = image_workflow(pw.parameters) + dw = data_workflow(iw.data, iw.parameters) + fw = postprocessing_workflow(dw.data, dw.parameters) # noqa: F841 def run_data(): - selected_files, settings = cached_workflow("mtor-bilder") - filesort_workflow(selected_files, settings) + cw = cached_data_workflow("mtor-bilder") diff --git a/mtor/commons.py b/mtor/commons.py new file mode 100644 index 0000000..1ebe1cf --- /dev/null +++ b/mtor/commons.py @@ -0,0 +1,38 @@ +import functools +import numpy +import re + +from collections import namedtuple + +from .luts import FIRE_LUT # noqa: F401 + + +IMG_MAX_INTENSITY = 65535 # 2^16 - 1 + +LABEL_SELECTED = "selected" +LABEL_DISCARDED = "discarded" + +OUTPUT_FOLDER_NAMES = ( + "colored", + "cuts", + f"cuts_{LABEL_DISCARDED}", + f"cuts_{LABEL_SELECTED}", + "data", +) + +RE_DIGITS = re.compile(r"(\d+)") + +ROI_STATISTIC_FUNCTIONS = { + "min": numpy.amin, + "max": numpy.amax, + "1quantile": functools.partial(numpy.percentile, q=25), + "2quantile": functools.partial(numpy.percentile, q=50), + "3quantile": functools.partial(numpy.percentile, q=75), + "average": numpy.average, + "median": numpy.median, + "std": numpy.std, +} + +TifImage = namedtuple("TifImage", ["path", "frame"]) +TifArray = namedtuple("TifArray", ["path", "frame", "data"]) +WorkflowResult = namedtuple("WorkflowResult", ["data", "parameters"]) diff --git a/mtor/dataproc.py b/mtor/dataproc.py index 2bfa602..7624b5c 100644 --- a/mtor/dataproc.py +++ b/mtor/dataproc.py @@ -6,13 +6,12 @@ import pickle import seaborn import matplotlib.pyplot as pyplot -from peakutils.plot import plot as peakplot from scipy.signal import savgol_filter -from .imageproc import STATS_FUNCS, LEFT_GUARD, ROI_NAME, RIGHT_GUARD +from .commons import ROI_STATISTIC_FUNCTIONS -def peakplot_iloc(x, y, ind): +def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"): """ Plots the original data with the peaks that were identified @@ -29,15 +28,26 @@ def peakplot_iloc(x, y, ind): ind : array-like Indexes of the identified peaks """ - pyplot.plot(x, y, "--") + pyplot.plot(x, y) + num_items = len(ix1) pyplot.plot( - x.iloc[ind], - y.iloc[ind], + x.iloc[ix1], + y.iloc[ix1], "r+", ms=5, mew=2, - label="{} peaks".format(len(ind)), + label=f"{num_items} {ix1_label}", ) + if ix2 is not None: + num_items = len(ix2) + pyplot.plot( + x.iloc[ix2], + y.iloc[ix2], + "g1", + ms=5, + mew=2, + label=f"{num_items} {ix2_label}", + ) pyplot.legend() @@ -56,26 +66,31 @@ def set_plotting_styles(): seaborn.set_context("talk") -def init_pandas_data_frame(): +def init_pandas_data_frame(parameters): columns = ["file", "frame"] - for prefix in (LEFT_GUARD, ROI_NAME, RIGHT_GUARD): - for key in STATS_FUNCS: - columns.append(f"{prefix}.{key}") + groups = ( + parameters.roi_name, + parameters.left_guard_name, + parameters.right_guard_name, + ) + for group in groups: + for func_name in ROI_STATISTIC_FUNCTIONS: + columns.append(f"{group}.{func_name}") return pandas.DataFrame(columns=columns) -def construct_data_frame(stats_results): - data_frame = init_pandas_data_frame() +def construct_data_frame(stats_results, parameters): + data_frame = init_pandas_data_frame(parameters) data_frame = data_frame.append(stats_results, ignore_index=True) return data_frame.sort_values(by=["frame"]).reset_index(drop=True) -def find_guard_value(data_frame, settings): - left_avg = data_frame["left.average"] - right_avg = data_frame["right.average"] - guards_avg = left_avg.append(right_avg, ignore_index=True) +def find_guard_threshold(data_frame, parameters): + left_values = data_frame[parameters.left_guard_column] + right_values = data_frame[parameters.right_guard_column] + guard_values = left_values.append(right_values, ignore_index=True) guard_data = numpy.histogram( - guards_avg, bins=settings.guard_histogram_bins + guard_values, bins=parameters.guard_histogram_bins ) guard_counts = guard_data[0].astype(numpy.float16) guard_edges = guard_data[1][1:] # edges enclose the counts @@ -85,13 +100,15 @@ def find_guard_value(data_frame, settings): pyplot.title("Histogram of Guard Avarages (not filtered)") pyplot.xlabel("Average Intensity [au]") pyplot.ylabel("Number of Observations [1]") - path = settings.data_dir / "1-histogram-of-guard-avarages-not-filtered.png" + path = ( + parameters.data_dir / "1-histogram-of-guard-avarages-not-filtered.png" + ) pyplot.savefig(str(path)) guard_counts_filtered = savgol_filter( guard_counts, - settings.guard_filter_window, - settings.guard_filter_polynom, + parameters.guard_filter_window, + parameters.guard_filter_polynom, ) pyplot.clf() @@ -99,7 +116,7 @@ def find_guard_value(data_frame, settings): pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)") pyplot.xlabel("Average Intensity [au]") pyplot.ylabel("Number of Observations [1]") - path = settings.data_dir / "2-histogram-of-guard-avarages-filtered.png" + path = parameters.data_dir / "2-histogram-of-guard-avarages-filtered.png" pyplot.savefig(str(path)) # Finding the first minima after the first peak @@ -114,47 +131,53 @@ def find_guard_value(data_frame, settings): inverted_series = max(guard_counts_filtered) - guard_counts_filtered indexes = peakutils.indexes( inverted_series[first_peak_position:], - min_dist=settings.guards_minima_min_dist, + min_dist=parameters.guards_minima_min_dist, ) # since we shortened the data, we need to add the first peak position first_minima_position = indexes[0] + first_peak_position - settings.guard_max_value = guard_edges[first_minima_position] + parameters.guard_max_value = guard_edges[first_minima_position] pyplot.clf() - peakplot(guard_edges, guard_counts_filtered, [first_minima_position]) + peakplot_iloc( + pandas.Series(guard_edges), pandas.Series(guard_counts_filtered), [first_minima_position], "minima" + ) pyplot.title( ( f"Histogram of Guard Avarages (Savitzky-Golay filter)," - f" first minima at {int(settings.guard_max_value)} au" + f" first minima at {int(parameters.guard_max_value)} au" ) ) pyplot.xlabel("Average Intensity [au]") pyplot.ylabel("Number of Observations [1]") path = ( - settings.data_dir + parameters.data_dir / "3-histogram-of-guard-avarages-filtered-with-first-minima.png" ) pyplot.savefig(str(path)) -def check_guards(data_frame, settings): - for what in (LEFT_GUARD, RIGHT_GUARD): +def check_guards(data_frame, parameters): + for what in (parameters.left_guard_name, parameters.right_guard_name): ok_col = f"{what}.ok" data_frame[ok_col] = False - mask = data_frame[f"{what}.average"] < settings.guard_max_value + mask = ( + data_frame[f"{what}.{parameters.guard_stats}"] + < parameters.guard_max_value + ) data_frame.loc[mask, ok_col] = True data_frame["guards.ok"] = ( - data_frame[f"{LEFT_GUARD}.ok"] & data_frame[f"{RIGHT_GUARD}.ok"] + data_frame[f"{parameters.left_guard_name}.ok"] + & data_frame[f"{parameters.right_guard_name}.ok"] ) - mask = data_frame["guards.ok"] == True + mask = data_frame["guards.ok"] == True # noqa: E712 guarded_df = data_frame[mask].copy() pyplot.clf() ax = seaborn.scatterplot( x="frame", - y=f"{ROI_NAME}.average", + y=parameters.roi_column, data=data_frame, hue="guards.ok", hue_order=[True, False], @@ -163,13 +186,13 @@ def check_guards(data_frame, settings): pyplot.title("Selection based on guard values") pyplot.ylabel("Average Intensity [au]") pyplot.ylabel("Frame Number [1]") - path = settings.data_dir / "4-image-selection-based-on-guard-values.png" + path = parameters.data_dir / "4-image-selection-based-on-guard-values.png" pyplot.savefig(str(path)) - settings.charts_y_limit = ax.get_ylim() + parameters.charts_y_limit = ax.get_ylim() pyplot.clf() ax = seaborn.scatterplot( - x="frame", y=f"{ROI_NAME}.average", data=guarded_df + x="frame", y=parameters.roi_column, data=guarded_df ) count_all_images = len(data_frame) count_guarded_images = len(guarded_df) @@ -181,81 +204,80 @@ def check_guards(data_frame, settings): ) pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - ax.set_ylim(settings.charts_y_limit) - path = settings.data_dir / "5-selected-values-based-on-guard-values.png" + ax.set_ylim(parameters.charts_y_limit) + path = parameters.data_dir / "5-selected-values-based-on-guard-values.png" pyplot.savefig(str(path)) - return data_frame -def find_outliers(data_frame, settings): +def find_outliers(data_frame, parameters): - mask = data_frame["guards.ok"] == True + mask = data_frame["guards.ok"] == True # noqa: E712 guarded_df = data_frame[mask].copy() pyplot.clf() - seaborn.boxplot(data=guarded_df, x=f"{ROI_NAME}.average") + seaborn.boxplot(data=guarded_df, x=parameters.roi_column) pyplot.title(f"Boxblot of guarded values") pyplot.xlabel("Average Intensity [au]") - path = settings.data_dir / "6-boxplot-of-guarded-values.png" + path = parameters.data_dir / "6-boxplot-of-guarded-values.png" pyplot.savefig(str(path)) - lower_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.25) - upper_quartil = guarded_df[f"{ROI_NAME}.average"].quantile(0.75) + lower_quartil = guarded_df[parameters.roi_column].quantile(0.25) + upper_quartil = guarded_df[parameters.roi_column].quantile(0.75) inter_quartil_range = upper_quartil - lower_quartil - settings.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range + parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range data_frame["outlier.ok"] = ( - data_frame[f"{ROI_NAME}.average"] < settings.outlier_upper_limit + data_frame[parameters.roi_column] < parameters.outlier_upper_limit ) return data_frame -def select_value_on_guards_and_outliers(data_frame, settings): +def select_on_guards_and_outliers(data_frame, parameters): data_frame["outlier_guards.ok"] = ( data_frame["guards.ok"] & data_frame["outlier.ok"] ) - mask = data_frame["outlier_guards.ok"] == True + mask = data_frame["outlier_guards.ok"] == True # noqa: E712 selected_df = data_frame[mask].copy() pyplot.clf() ax = seaborn.scatterplot( - x="frame", y=f"{ROI_NAME}.average", data=selected_df + x="frame", y=parameters.roi_column, data=selected_df ) pyplot.title(f"Selected Images, outliers removed") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - ax.set_ylim(settings.charts_y_limit) - path = settings.data_dir / "7-selected-images-outliers-removed.png" + ax.set_ylim(parameters.charts_y_limit) + path = parameters.data_dir / "7-selected-images-outliers-removed.png" pyplot.savefig(str(path)) return selected_df -def smooth_rolling_min(selected_df, settings): +def smooth_rolling_min(selected_df, parameters): rm = ( - selected_df[f"{ROI_NAME}.average"] - .rolling(settings.rolling_min_window) + selected_df[parameters.roi_column] + .rolling(parameters.rolling_min_window) .min() ) # after a rolling window calculation, the first values will be NaN, # we need to fill them - selected_df[f"{ROI_NAME}.average.rolling.min"] = rm.fillna( + selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna( method="backfill" ) pyplot.clf() ax = seaborn.scatterplot( - x="frame", y=f"{ROI_NAME}.average.rolling.min", data=selected_df + x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df ) pyplot.title(f"Selected Images, outliers removed, rolling min applied") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - ax.set_ylim(settings.charts_y_limit) + ax.set_ylim(parameters.charts_y_limit) path = ( - settings.data_dir + parameters.data_dir / "8-selected-images-outliers-removed-rolling-min-applied.png" ) pyplot.savefig(str(path)) @@ -263,17 +285,17 @@ def smooth_rolling_min(selected_df, settings): return selected_df -def smooth_savgol_filter(selected_df, settings): +def smooth_savgol_filter(selected_df, parameters): filtered = savgol_filter( - selected_df[f"{ROI_NAME}.average.rolling.min"], - settings.savgol_filter_window, - settings.savgol_filter_polynom, + selected_df[f"{parameters.roi_name}.rolling.min"], + parameters.savgol_filter_window, + parameters.savgol_filter_polynom, ) - selected_df[f"{ROI_NAME}.average.savgol"] = filtered + selected_df[f"{parameters.roi_name}.savgol"] = filtered pyplot.clf() seaborn.lineplot( - x="frame", y=f"{ROI_NAME}.average.savgol", data=selected_df + x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df ) pyplot.title( ( @@ -284,7 +306,7 @@ def smooth_savgol_filter(selected_df, settings): pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") path = ( - settings.data_dir + parameters.data_dir / "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png" ) pyplot.savefig(str(path)) @@ -292,46 +314,50 @@ def smooth_savgol_filter(selected_df, settings): return selected_df -def find_extremas(selected_df, settings): +def find_extremas(selected_df, parameters): - indexes = peakutils.indexes( - selected_df[f"{ROI_NAME}.average.savgol"], - thres=settings.peak_threshold, - min_dist=settings.peak_min_distance, + max_indexes = peakutils.indexes( + selected_df[f"{parameters.roi_name}.savgol"], + thres=parameters.peak_threshold, + min_dist=parameters.peak_min_distance, ) - maximas = selected_df.iloc[indexes].copy() + maximas = selected_df.iloc[max_indexes].copy() pyplot.clf() peakplot_iloc( selected_df["frame"], - selected_df[f"{ROI_NAME}.average.savgol"], - indexes, + selected_df[f"{parameters.roi_name}.savgol"], + max_indexes, + "maxima", ) pyplot.title(f"Finding Maximas") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - path = settings.data_dir / "10-finding-maximas.png" + path = parameters.data_dir / "10-finding-maximas.png" pyplot.savefig(str(path)) inverted_series = ( - max(selected_df[f"{ROI_NAME}.average.savgol"]) - - selected_df[f"{ROI_NAME}.average.savgol"] + max(selected_df[f"{parameters.roi_name}.savgol"]) + - selected_df[f"{parameters.roi_name}.savgol"] ) - indexes = peakutils.indexes( - inverted_series, min_dist=settings.peak_min_distance + min_indexes = peakutils.indexes( + inverted_series, min_dist=parameters.peak_min_distance ) - minimas = selected_df.iloc[indexes].copy() + minimas = selected_df.iloc[min_indexes].copy() pyplot.clf() peakplot_iloc( selected_df["frame"], - selected_df[f"{ROI_NAME}.average.savgol"], - indexes, + selected_df[f"{parameters.roi_name}.savgol"], + max_indexes, + "maxima", + min_indexes, + "minima", ) pyplot.title(f"Finding Minimas") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - path = settings.data_dir / "11-finding-minimas.png" + path = parameters.data_dir / "11-finding-minimas.png" pyplot.savefig(str(path)) maximas["is_maxima"] = True @@ -340,91 +366,38 @@ def find_extremas(selected_df, settings): return extremas_df -def save_data(data_frame, selected_df, extremas_df, settings): - path = settings.data_dir / "numeric-data.xlsx" +def save_data(data_frame, selected_df, extremas_df, parameters): + path = parameters.data_dir / "numeric-data.xlsx" writer = pandas.ExcelWriter(path, engine="xlsxwriter") extremas_df.to_excel(writer, sheet_name="extremas") selected_df.to_excel(writer, sheet_name="selected data") data_frame.to_excel(writer, sheet_name="raw data") - ignore_settings = {"tif_list", "cuts_dir"} - tmp_settings = { - k: [v] for k, v in settings.items() if k not in ignore_settings + ignore_parameters = {"tif_list", "cuts_dir"} + tmp_parameters = { + k: [v] for k, v in parameters.items() if k not in ignore_parameters } - tmp_setings_df = pandas.DataFrame(tmp_settings).T + tmp_setings_df = pandas.DataFrame(tmp_parameters).T tmp_setings_df.to_excel(writer, sheet_name="parameters") writer.save() -def data_workflow(stats_results, settings): - print("3/4: Data analysis") - set_plotting_styles() - data_frame = construct_data_frame(stats_results) - save_temp(data_frame, settings) - find_guard_value(data_frame, settings) - data_frame = check_guards(data_frame, settings) - data_frame = find_outliers(data_frame, settings) - - selected_df = select_value_on_guards_and_outliers(data_frame, settings) - selected_df = smooth_rolling_min(selected_df, settings) - selected_df = smooth_savgol_filter(selected_df, settings) - - extremas_df = find_extremas(selected_df, settings) - - save_data(data_frame, selected_df, extremas_df, settings) - - return list(selected_df["file"]) - - -def save_temp(data_frame, settings): - csv_path = settings.tif_dir / "_data.csv" +def save_temp(data_frame, parameters): + csv_path = parameters.tif_dir / "_data.csv" data_frame.to_csv(csv_path, sep="\t") - xls_path = settings.tif_dir / "_data.xlsx" - data_frame.to_excel(xls_path) - settings_path = settings.tif_dir / "_settings.pickle" - with open(settings_path, "wb") as fh: - pickle.dump(settings, fh) + parameters_path = parameters.tif_dir / "_parameters.pickle" + with open(parameters_path, "wb") as fh: + pickle.dump(parameters, fh) def load_temp(folder): dir_path = pathlib.Path(folder) csv_path = dir_path / "_data.csv" data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0) - settings_path = dir_path / "_settings.pickle" - with open(settings_path, "rb") as fh: - settings = pickle.load(fh) - - settings.cut_pad_x = 50 - settings.cut_pad_y = 25 - settings.guard_histogram_bins = 100 - settings.guard_filter_window = 7 - settings.guard_filter_polynom = 3 - settings.guards_minima_min_dist = 10 - settings.rolling_min_window = 5 - settings.savgol_filter_window = 51 - settings.savgol_filter_polynom = 1 - settings.peak_min_distance = 5 - settings.peak_threshold = 0.3 - - return data_frame, settings - - -def cached_workflow(folder): - print("3/4: Data analysis") - set_plotting_styles() - data_frame, settings = load_temp(folder) - find_guard_value(data_frame, settings) - data_frame = check_guards(data_frame, settings) - data_frame = find_outliers(data_frame, settings) - - selected_df = select_value_on_guards_and_outliers(data_frame, settings) - selected_df = smooth_rolling_min(selected_df, settings) - selected_df = smooth_savgol_filter(selected_df, settings) - - extremas_df = find_extremas(selected_df, settings) - - save_data(data_frame, selected_df, extremas_df, settings) - - return list(selected_df["file"]), settings + parameters_path = dir_path / "_parameters.pickle" + with open(parameters_path, "rb") as fh: + parameters = pickle.load(fh) + + return data_frame, parameters diff --git a/mtor/imageproc.py b/mtor/imageproc.py index 10be957..361147e 100644 --- a/mtor/imageproc.py +++ b/mtor/imageproc.py @@ -1,32 +1,13 @@ -from collections import namedtuple -from functools import partial -from multiprocessing import Pool from PIL import Image -from tqdm import tqdm import numpy -from .luts import FIRE_LUT - - -STATS_FUNCS = { - "min": numpy.amin, - "max": numpy.amax, - "1quantile": partial(numpy.percentile, q=25), - "2quantile": partial(numpy.percentile, q=50), - "3quantile": partial(numpy.percentile, q=75), - "average": numpy.average, - "median": numpy.median, - "std": numpy.std, -} - -LEFT_GUARD = "left" -ROI_NAME = "roi" -RIGHT_GUARD = "right" - -ROI_BORDER_INTENSITY = 2**15 - -TifArray = namedtuple("TifArray", ["path", "frame", "data"]) +from .commons import ( + TifArray, + IMG_MAX_INTENSITY, + FIRE_LUT, + ROI_STATISTIC_FUNCTIONS, +) def open_as_array(tif_image): @@ -38,19 +19,19 @@ def open_as_array(tif_image): ) -def scale_and_colorize(tif_array, settings): - adjusted_array = (tif_array.data - settings.offset) // settings.scale +def scale_and_colorize(tif_array, parameters): + adjusted_array = (tif_array.data - parameters.offset) // parameters.scale # paint roi - top = settings.roi_top - bottom = settings.roi_bottom - right = settings.roi_right - left = settings.roi_left - adjusted_array[top, left:right] = ROI_BORDER_INTENSITY - adjusted_array[bottom, left:right] = ROI_BORDER_INTENSITY - adjusted_array[top:bottom, left] = ROI_BORDER_INTENSITY - adjusted_array[top:bottom, right] = ROI_BORDER_INTENSITY + top = parameters.roi_top + bottom = parameters.roi_bottom + right = parameters.roi_right + left = parameters.roi_left + adjusted_array[top, left:right] = IMG_MAX_INTENSITY + adjusted_array[bottom, left:right] = IMG_MAX_INTENSITY + adjusted_array[top:bottom, left] = IMG_MAX_INTENSITY + adjusted_array[top:bottom, right] = IMG_MAX_INTENSITY adjusted = Image.fromarray(adjusted_array) converted = adjusted.convert(mode="L").convert(mode="P") @@ -58,21 +39,21 @@ def scale_and_colorize(tif_array, settings): return converted.convert("RGB") -def safe_color_coded(tif_array, settings): - png_image = scale_and_colorize(tif_array, settings) - png_path = settings.colored_dir / (tif_array.path.stem + ".jpg") +def safe_color_coded(tif_array, parameters): + png_image = scale_and_colorize(tif_array, parameters) + png_path = parameters.colored_dir / (tif_array.path.stem + ".jpg") with png_path.open("wb") as outhandle: png_image.save(outhandle) del png_image -def safe_cut(tif_array, settings): - cut_path = settings.cuts_dir / (tif_array.path.stem + "_cut.tif") +def safe_cut(tif_array, parameters): + cut_path = parameters.cuts_dir / tif_array.path.name - top = settings.cut_top - bottom = settings.cut_bottom - right = settings.cut_right - left = settings.cut_left + top = parameters.cut_top + bottom = parameters.cut_bottom + right = parameters.cut_right + left = parameters.cut_left with cut_path.open("wb") as outhandle: cut_array = tif_array.data[top:bottom, left:right] @@ -81,42 +62,35 @@ def safe_cut(tif_array, settings): del cut_img -def get_roi(tif_array, settings): - top = settings.roi_top - bottom = settings.roi_bottom - right = settings.roi_right - left = settings.roi_left +def get_roi(tif_array, parameters): + top = parameters.roi_top + bottom = parameters.roi_bottom + right = parameters.roi_right + left = parameters.roi_left return tif_array.data[top:bottom, left:right] -def analyse_roi(tif_array, settings): - data = get_roi(tif_array, settings) +def analyse_roi(tif_array, parameters): + data = get_roi(tif_array, parameters) results = {} - results.update(get_stats(data[:, 1], LEFT_GUARD)) - results.update(get_stats(data[:, 1:-1], ROI_NAME)) - results.update(get_stats(data[:, -1], RIGHT_GUARD)) + results.update(get_stats(data[:, 1], parameters.left_guard_name)) + results.update(get_stats(data[:, 1:-1], parameters.roi_name)) + results.update(get_stats(data[:, -1], parameters.right_guard_name)) return results def get_stats(array, prefix): - return {f"{prefix}.{k}": f(array) for k, f in STATS_FUNCS.items()} + return { + f"{prefix}.{name}": func(array) + for name, func in ROI_STATISTIC_FUNCTIONS.items() + } -def process_one_tif(tif_image, settings): +def process_one_tif(tif_image, parameters): tif_array = open_as_array(tif_image) - safe_color_coded(tif_array, settings) - safe_cut(tif_array, settings) - stats_result = analyse_roi(tif_array, settings) + safe_color_coded(tif_array, parameters) + safe_cut(tif_array, parameters) + stats_result = analyse_roi(tif_array, parameters) stats_result["file"] = tif_image.path.name stats_result["frame"] = tif_image.frame return stats_result - - -def image_workflow(settings): - print("2/4: Image analysis and conversion") - func = partial(process_one_tif, settings=settings) - tif_list = settings.tif_list - num_items = len(tif_list) - with Pool(4) as p: - stat_results = list(tqdm(p.imap(func, tif_list), total=num_items)) - return stat_results diff --git a/mtor/parameters.py b/mtor/parameters.py new file mode 100644 index 0000000..06c36de --- /dev/null +++ b/mtor/parameters.py @@ -0,0 +1,179 @@ +import pathlib + +from PIL import Image + +from .commons import RE_DIGITS, OUTPUT_FOLDER_NAMES, TifImage + + +class Parameters(dict): + """ namespacing the parameters for a analysis with nice access + + the following parameters are used: + + boost: linear factor for brightness on color coded images + charts_y_limit: common scale for the y axis in graphs + colored_dir: path to the directory to hold color coded images + cut_bottom: bottom edge for croping an image + cut_left: left edge for croping an image + cut_pad_x: pixels added to left or right edge to crop an image + cut_pad_y: pixels added to top or bottom edge to crop an image + cut_right: right edge for croping an image + cut_top: left edge for croping an image + cuts_discarded_dir: path to the directory of discarded croped images + cuts_selected_dir: path to the directory of selected croped images + data_dir: path to the directory for graphs and numeric data + folder: name of the directory that holds the tif images + guard_filter_polynom: polynomal scalar for smoothing the guards histogram + guard_filter_window: window scalar for smoothing the guards histogram + guard_histogram_bins: number of bins when constructing guards histogram + guard_max_value: calculated threshold for the gurards + guard_stats: which values to use for caclulation of guards + guards_minima_min_dist: min distance between minimas in guards histogram + image_height: height of an image + image_width: width of an image + intensity_max: highest intensity found in all images + intensity_min: lowest intensity found in all images + left_guard_column: name of the most used left guard value column + left_guard_name: left guard value group name + offset: calculated the lowest intensity to scale by + outlier_upper_limit: calculated threshold value for outliers + peak_min_distance: min distance for peaks in the final results + peak_threshold: threshold peak height in the final results + right_guard_column: name of the most used left guard value column + right_guard_name: right guard value group name + roi_bottom: bottom edge of the ROI + roi_column: + roi_left: bottom left of the ROI + roi_name: roi value group name + roi_right: bottom right of the ROI + roi_stats: which values to use for caclulation of roi + roi_top: top edge of the ROI + rolling_min_window: size of the window for the rolling min calculation + savgol_filter_polynom: polynomal scalar for smoothing the final results + savgol_filter_window: window scalar for smoothing the final results + scale: calculated factor to boost image intensities + tif_dir: path to the directory with the original tif images + + """ + + def __init__(self, folder, top, right, bottom, left, **kargs): + super().__init__(self) + + self.folder = folder + + # defaults + self.boost = 10 + self.cut_pad_x = 50 + self.cut_pad_y = 25 + self.guard_histogram_bins = 100 + self.guard_filter_window = 7 + self.guard_filter_polynom = 3 + self.guards_minima_min_dist = 10 + self.rolling_min_window = 5 + self.savgol_filter_window = 51 + self.savgol_filter_polynom = 1 + self.peak_min_distance = 5 + self.peak_threshold = 0.3 + + # labels for data items + self.roi_name = "roi" + self.roi_stats = "average" + self.roi_column = f"{self.roi_name}.{self.roi_stats}" + self.left_guard_name = "left" + self.right_guard_name = "right" + self.guard_stats = "average" + self.left_guard_column = f"{self.left_guard_name}.{self.guard_stats}" + self.right_guard_column = f"{self.right_guard_name}.{self.guard_stats}" + + # arbitrary keyword arguments, updating defaults if necessary + self.update(kargs) + + # create list of images + self.tif_dir = pathlib.Path(folder) + if not self.tif_dir.is_dir(): + raise IOError(f"Not a directory: {self.folder}") + self.tif_list = self.scan_image_dir() + + # set the names of output directories + for name in OUTPUT_FOLDER_NAMES: + sub_path = self.tif_dir / name + self[f"{name}_dir"] = sub_path + + # get image dimensions + width, height = self.get_size_of_one_image() + self.image_width = width + self.image_height = height + + self.set_roi_and_cut_size(top, right, bottom, left) + + def __getattr__(self, key): + """ return a parameters value as instance property """ + return self[key] + + def __setattr__(self, key, value): + """ set a parameters value via instance property """ + self[key] = value + + def __getstate__(self): + # Copy the object's state from self.__dict__ which contains + # all our instance attributes. Always use the dict.copy() + # method to avoid modifying the original state. + return self.__dict__.copy() + + def __setstate__(self, state): + # Restore instance attributes (i.e., filename and lineno). + self.__dict__.update(state) + + def scan_image_dir(self): + """ scan a folder for tif images """ + visible = ( + item + for item in self.tif_dir.iterdir() + if not item.stem.startswith(".") + ) + tifs = (item for item in visible if item.suffix == ".tif") + results = [] + for path in tifs: + try: + match = RE_DIGITS.search(path.name).group() + except AttributeError as ex: + raise ValueError( + f"no sequence number found in {path.name}" + ) from ex + results.append(TifImage(path=path, frame=int(match))) + return results + + def get_size_of_one_image(self): + """ get the dimension of one Image + + the dimensions should be the same on all images. this is ensured in + an other workflow. + """ + image = self.tif_list[0] + with open(image.path, "rb") as filehandle: + image = Image.open(filehandle) + size = image.size + return size + + def set_roi_and_cut_size(self, top, right, bottom, left): + if right < left: + left, right = right, left + if top > bottom: + top, bottom = bottom, top + if ( + top < 0 + or left < 0 + or bottom > self.image_height + or right > self.image_width + ): + raise ValueError("ROI out of bounce") + + self.roi_top = top + self.roi_bottom = bottom + self.roi_right = right + self.roi_left = left + + self.cut_top = min(self.image_height, top - self.cut_pad_y) + self.cut_bottom = max(0, bottom + self.cut_pad_y) + self.cut_right = min(self.image_width, right + self.cut_pad_x) + self.cut_left = max(0, left - self.cut_pad_x) diff --git a/mtor/postproc.py b/mtor/postproc.py new file mode 100644 index 0000000..d704915 --- /dev/null +++ b/mtor/postproc.py @@ -0,0 +1,40 @@ +import pathlib + +from .commons import LABEL_SELECTED, LABEL_DISCARDED + + +def stem_file_list(selected_files): + return {pathlib.Path(filename).stem for filename in selected_files} + + +def rename_color_coded_images(file_stems, parameters): + rename_pairs = [] + for path in parameters.colored_dir.iterdir(): + if not path.stem.startswith("."): + label = ( + LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED + ) + new_name = path.with_name(f"{path.stem}_{label}{path.suffix}") + rename_pairs.append((path, new_name)) + return rename_pairs + + +def sort_cut_images(file_stems, parameters): + sort_pairs = [] + for path in parameters.cuts_dir.iterdir(): + if not path.stem.startswith("."): + label = ( + LABEL_SELECTED if path.stem in file_stems else LABEL_DISCARDED + ) + new_path = ( + parameters[f"cuts_{label}_dir"] + / f"{path.stem}_cut_{label}{path.suffix}" + ) + sort_pairs.append((path, new_path)) + return sort_pairs + + +def remove_cuts_dir(parameters): + for item in parameters["cuts_dir"].iterdir(): + item.unlink() + parameters["cuts_dir"].rmdir() diff --git a/mtor/prescan.py b/mtor/prescan.py index 2d92f8c..149b27a 100644 --- a/mtor/prescan.py +++ b/mtor/prescan.py @@ -1,163 +1,36 @@ -import re - -from collections import namedtuple -from pathlib import Path from PIL import Image from tqdm import tqdm +from .commons import OUTPUT_FOLDER_NAMES, IMG_MAX_INTENSITY -RE_DIGITS = re.compile(r"(\d+)") - -LABEL_SELECTED = "selected" -LABEL_DISCARDED = "discarded" - - -TifImage = namedtuple("TifImage", ["path", "frame"]) - - -class Settings(dict): - """ namespacing the constant values for a analysis with nice access """ - - def __getattr__(self, key): - """ return a settings value as instance property """ - return self[key] - - def __setattr__(self, key, value): - """ set a settings value via instance property """ - self[key] = value - - def __getstate__(self): - # Copy the object's state from self.__dict__ which contains - # all our instance attributes. Always use the dict.copy() - # method to avoid modifying the original state. - return self.__dict__.copy() - - def __setstate__(self, state): - # Restore instance attributes (i.e., filename and lineno). - self.__dict__.update(state) - - -def scan_image_dir(settings): - """ scan a folder for tif images """ - tif_dir = Path(settings.folder) - if not tif_dir.is_dir(): - raise IOError(f"Not a directory: {settings.folder}") - visible = (i for i in tif_dir.iterdir() if not i.stem.startswith(".")) - tifs = (item for item in visible if item.suffix == ".tif") - results = [] - for path in tifs: - try: - match = RE_DIGITS.search(path.name).group() - except AttributeError as ex: - raise ValueError( - f"no sequence number found in {path.name}" - ) from ex - results.append(TifImage(path=path, frame=int(match))) - settings.tif_dir = tif_dir - settings.tif_list = results - return settings - - -def prepare_output_dirs(settings): - dir_names = ( - "colored", - "cuts", - f"cuts_{LABEL_DISCARDED}", - f"cuts_{LABEL_SELECTED}", - "data", - ) - for subdir in dir_names: - sub_path = settings.tif_dir / subdir - sub_path.mkdir(exist_ok=True) - for item in sub_path.iterdir(): - item.unlink() - settings[f"{subdir}_dir"] = sub_path - return settings - -def pre_scan_tifs(settings): +def scan_tifs(parameters): """ scan a sequence of tif images for common values """ - lowest = 2 ** 16 + lowest = IMG_MAX_INTENSITY highest = 0 - width = None - height = None - - print("1/4: scanning tifs for common autocontrast values") - for tif in tqdm(settings.tif_list): + for tif in tqdm(parameters.tif_list): with tif.path.open("rb") as filehandle: image = Image.open(filehandle) mi, ma = image.getextrema() lowest = min(lowest, mi) highest = max(highest, ma) - if width is None: - width, height = image.size - elif width != image.width or height != image.height: + if ( + image.width != parameters.image_width + or image.height != parameters.image_height + ): raise ValueError("Images have different sizes") - print(f"intensity range: {lowest} to {highest}") - print(f"image size: {width} x {height} pixels") - - settings.intensity_min = lowest - settings.intensity_max = highest - settings.image_width = width - settings.image_height = height - - return settings - - -def check_roi_size(settings, top, right, bottom, left): - if ( - top > settings.image_height - or bottom > settings.image_height - or right > settings.image_width - or left > settings.image_width - ): - raise ValueError("ROI out of bounce") - if right < left: - left, right = right, left - if bottom < top: - top, bottom = bottom, top - - settings.roi_top = top - settings.roi_bottom = bottom - settings.roi_right = right - settings.roi_left = left - - settings.cut_top = min(settings.image_height, top - settings.cut_pad_y) - settings.cut_bottom = max(0, bottom + settings.cut_pad_y) - settings.cut_right = min(settings.image_width, right + settings.cut_pad_x) - settings.cut_left = max(0, left - settings.cut_pad_x) + parameters.intensity_min = lowest + parameters.intensity_max = highest - return settings + return parameters -def prescan_workflow(folder, top, right, bottom, left, boost=10, **kargs): - settings = Settings() - settings.folder = folder - settings.boost = boost - - settings.cut_pad_x = 50 - settings.cut_pad_y = 25 - settings.guard_histogram_bins = 100 - settings.guard_filter_window = 7 - settings.guard_filter_polynom = 3 - settings.guards_minima_min_dist = 10 - settings.rolling_min_window = 5 - settings.savgol_filter_window = 51 - settings.savgol_filter_polynom = 1 - settings.peak_min_distance = 5 - settings.peak_threshold = 0.3 - - settings.update(kargs) - - settings = scan_image_dir(settings) - settings = prepare_output_dirs(settings) - settings = pre_scan_tifs(settings) - settings = check_roi_size(settings, top, right, bottom, left) - - settings.offset = settings.intensity_min - max_offset = settings.intensity_max - settings.intensity_min - settings.scale = max_offset / 255 / boost - - return settings +def prepare_output_dirs(parameters): + for name in OUTPUT_FOLDER_NAMES: + sub_path = parameters[f"{name}_dir"] + sub_path.mkdir(exist_ok=True) + for item in sub_path.iterdir(): + item.unlink() + return parameters diff --git a/mtor/workflows.py b/mtor/workflows.py new file mode 100644 index 0000000..d024c5a --- /dev/null +++ b/mtor/workflows.py @@ -0,0 +1,108 @@ +import multiprocessing +import functools + +from tqdm import tqdm + +from .commons import WorkflowResult +from .parameters import Parameters +from .prescan import scan_tifs, prepare_output_dirs +from .imageproc import process_one_tif +from .dataproc import ( + set_plotting_styles, + construct_data_frame, + find_guard_threshold, + check_guards, + find_outliers, + select_on_guards_and_outliers, + smooth_rolling_min, + smooth_savgol_filter, + find_extremas, + save_data, +) +from .postproc import ( + stem_file_list, + rename_color_coded_images, + sort_cut_images, + remove_cuts_dir, +) + + +def prescan_workflow(folder, top, right, bottom, left, **kargs): + print("1/4: scanning tifs for common autocontrast values") + + parameters = Parameters(folder, top, right, bottom, left) + parameters = scan_tifs(parameters) + + parameters.offset = parameters.intensity_min + max_offset = parameters.intensity_max - parameters.intensity_min + parameters.scale = max_offset / 255 / parameters.boost + + prepare_output_dirs(parameters) + + return WorkflowResult(None, parameters) + + +def image_workflow(parameters): + print("2/4: Image analysis and conversion") + func = functools.partial(process_one_tif, parameters=parameters) + tif_list = parameters.tif_list + num_items = len(tif_list) + cpu_count = multiprocessing.cpu_count() + with multiprocessing.Pool(cpu_count) as mpp: + stat_results = list(tqdm(mpp.imap(func, tif_list), total=num_items)) + return WorkflowResult(stat_results, parameters) + + +def data_workflow(stats_results, parameters): + from .dataproc import save_temp + + print("3/4: Data analysis") + set_plotting_styles() + data_frame = construct_data_frame(stats_results, parameters) + save_temp(data_frame, parameters) + + find_guard_threshold(data_frame, parameters) + data_frame = check_guards(data_frame, parameters) + data_frame = find_outliers(data_frame, parameters) + + selected_df = select_on_guards_and_outliers(data_frame, parameters) + selected_df = smooth_rolling_min(selected_df, parameters) + selected_df = smooth_savgol_filter(selected_df, parameters) + + extremas_df = find_extremas(selected_df, parameters) + + save_data(data_frame, selected_df, extremas_df, parameters) + + return WorkflowResult(list(selected_df["file"]), parameters) + + +def postprocessing_workflow(selected_files, parameters): + print("4/4: Post processing") + file_stems = stem_file_list(selected_files) + cc_rename_pairs = rename_color_coded_images(file_stems, parameters) + cut_sort_pairs = sort_cut_images(file_stems, parameters) + file_pairs = cc_rename_pairs + cut_sort_pairs + for old_path, new_path in tqdm(file_pairs): + old_path.rename(new_path) + remove_cuts_dir(parameters) + + +def cached_data_workflow(folder): + from .dataproc import load_temp + + print("3/4: Data analysis") + set_plotting_styles() + data_frame, parameters = load_temp(folder) + find_guard_threshold(data_frame, parameters) + data_frame = check_guards(data_frame, parameters) + data_frame = find_outliers(data_frame, parameters) + + selected_df = select_on_guards_and_outliers(data_frame, parameters) + selected_df = smooth_rolling_min(selected_df, parameters) + selected_df = smooth_savgol_filter(selected_df, parameters) + + extremas_df = find_extremas(selected_df, parameters) + + save_data(data_frame, selected_df, extremas_df, parameters) + + return WorkflowResult(list(selected_df["file"]), parameters)