From 2cf6fc0f48e48857a989ba08e3db3f38c0f54ba3 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 2 May 2019 14:37:36 +0200 Subject: [PATCH] removing outlier detection, peak finding more robust --- .gitignore | 1 + mtor/dataproc.py | 90 +++++++++--------------------------------------- 2 files changed, 18 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index 73d36d2..adbee79 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ __pycache__/ # test data mtor-bilder/ +mtor-bilder-2/ oringinal-daten/ # C extensions diff --git a/mtor/dataproc.py b/mtor/dataproc.py index aa7e805..ac9c717 100644 --- a/mtor/dataproc.py +++ b/mtor/dataproc.py @@ -28,12 +28,10 @@ IMAGE_NAMES = { 2: "2-histogram-of-guard-avarages-filtered.png", 3: "3-histogram-of-guard-avarages-filtered-with-first-minima.png", 4: "4-image-selection-based-on-guard-values.png", - 5: "5-selected-values-based-on-guard-values.png", - 6: "6-boxplot-of-guarded-values.png", - 7: "7-selected-images-outliers-removed.png", - 8: "8-selected-images-outliers-removed-rolling-min-applied.png", - 9: "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png", - 10: "11-finding-minima-and-maxima.png", + 5: "5-selected-images-based-on-guard-values.png", + 6: "6-selected-images-rolling-min-applied.png", + 7: "7-selected-images-rolling-min-savgol-filtered.png", + 8: "8-finding-minima-and-maxima.png", } @@ -118,8 +116,8 @@ def find_guard_threshold(data_frame, parameters): guard_data = numpy.histogram( guard_values, bins=parameters.guard_histogram_bins ) - guard_counts = guard_data[0].astype(numpy.float16) - guard_edges = guard_data[1][1:] # edges enclose the counts + guard_counts = numpy.concatenate([[0], guard_data[0]]).astype(numpy.float16) + guard_edges = guard_data[1] # edges enclose the counts pyplot.clf() seaborn.lineplot(x=guard_edges, y=guard_counts) @@ -235,47 +233,9 @@ def check_guards(data_frame, parameters): return data_frame -def find_outliers(data_frame, parameters): - +def select_on_guards(data_frame, parameters): mask = data_frame["guards.ok"] == True # noqa: E712 - guarded_df = data_frame[mask].copy() - - pyplot.clf() - seaborn.boxplot(data=guarded_df, x=parameters.roi_column) - pyplot.title(f"Boxblot of guarded values") - pyplot.xlabel("Average Intensity [au]") - path = parameters.data_dir / IMAGE_NAMES[6] - pyplot.savefig(str(path)) - - lower_quartil = guarded_df[parameters.roi_column].quantile(0.25) - upper_quartil = guarded_df[parameters.roi_column].quantile(0.75) - inter_quartil_range = upper_quartil - lower_quartil - parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range - - data_frame["outlier.ok"] = ( - data_frame[parameters.roi_column] < parameters.outlier_upper_limit - ) - return data_frame - - -def select_on_guards_and_outliers(data_frame, parameters): - data_frame["outlier_guards.ok"] = ( - data_frame["guards.ok"] & data_frame["outlier.ok"] - ) - mask = data_frame["outlier_guards.ok"] == True # noqa: E712 selected_df = data_frame[mask].copy() - - pyplot.clf() - ax = seaborn.scatterplot( - x="frame", y=parameters.roi_column, data=selected_df - ) - pyplot.title(f"Selected Images, outliers removed") - pyplot.xlabel("Frame Number [1]") - pyplot.ylabel("Average Intensity [au]") - ax.set_ylim(parameters.charts_y_limit) - path = parameters.data_dir / IMAGE_NAMES[7] - pyplot.savefig(str(path)) - return selected_df @@ -296,11 +256,11 @@ def smooth_rolling_min(selected_df, parameters): ax = seaborn.scatterplot( x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df ) - pyplot.title(f"Selected Images, outliers removed, rolling min applied") + pyplot.title(f"Selected Images, rolling min applied") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") ax.set_ylim(parameters.charts_y_limit) - path = parameters.data_dir / IMAGE_NAMES[8] + path = parameters.data_dir / IMAGE_NAMES[6] pyplot.savefig(str(path)) return selected_df @@ -319,21 +279,17 @@ def smooth_savgol_filter(selected_df, parameters): x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df ) pyplot.title( - ( - f"Selected Images, outliers removed," - f" rolling min applied, Savitzky-Golay filtered" - ) + f"Selected Images, rolling min applied, Savitzky-Golay filtered" ) pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - path = parameters.data_dir / IMAGE_NAMES[9] + path = parameters.data_dir / IMAGE_NAMES[7] pyplot.savefig(str(path)) return selected_df def find_extremas(selected_df, parameters): - max_indexes = peakutils.indexes( selected_df[f"{parameters.roi_name}.savgol"], thres=parameters.peak_threshold, @@ -358,10 +314,10 @@ def find_extremas(selected_df, parameters): min_indexes, "minima", ) - pyplot.title(f"Finding Minimas") + pyplot.title(f"Finding Extrema") pyplot.xlabel("Frame Number [1]") pyplot.ylabel("Average Intensity [au]") - path = parameters.data_dir / IMAGE_NAMES[10] + path = parameters.data_dir / IMAGE_NAMES[8] pyplot.savefig(str(path)) maximas["is_maxima"] = True @@ -378,7 +334,7 @@ def save_data(data_frame, selected_df, extremas_df, parameters): selected_df.to_excel(writer, sheet_name="selected data") data_frame.to_excel(writer, sheet_name="raw data") - ignore_parameters = {"tif_list", "cuts_dir"} + ignore_parameters = {"tif_list"} tmp_parameters = { k: [v] for k, v in parameters.items() if k not in ignore_parameters } @@ -462,23 +418,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters): str(data_dir / IMAGE_NAMES[5]), width=img_width, height=img_height ), PageBreak(), - Paragraph("Removing Outliers", style_section), - text_and_graph(6, "From the remaining values, outliers are removed."), - text_and_graph( - 7, - ( - f"From {num_images} images {num_discarded} images were " - f"discarded, leaving {num_selected} selected. The finally " - "selected values are listed in the excel sheet 'selection' " - "in the data file." - ), - ), - PageBreak(), Paragraph( "Experimental: Applying a rolling min calculation", style_section ), text_and_graph( - 8, + 6, ( "Due to the nature of the experiment, unusable images tend " "to have a higher value as the desiered ones. Therfore a " @@ -487,11 +431,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters): ), Paragraph("Experimental: Finding Maxima and Minima", style_section), text_and_graph( - 9, + 7, "To smooth the resulting curve, a Savitzky-Golay filter is used.", ), text_and_graph( - 10, + 8, ( "The most interesting data points should be the maxima and " "minima of this curve. These are listed in the sheet "