Browse Source

removing outlier detection, peak finding more robust

master
Holger Frey 6 years ago
parent
commit
2cf6fc0f48
  1. 1
      .gitignore
  2. 90
      mtor/dataproc.py

1
.gitignore vendored

@ -6,6 +6,7 @@ __pycache__/ @@ -6,6 +6,7 @@ __pycache__/
# test data
mtor-bilder/
mtor-bilder-2/
oringinal-daten/
# C extensions

90
mtor/dataproc.py

@ -28,12 +28,10 @@ IMAGE_NAMES = { @@ -28,12 +28,10 @@ IMAGE_NAMES = {
2: "2-histogram-of-guard-avarages-filtered.png",
3: "3-histogram-of-guard-avarages-filtered-with-first-minima.png",
4: "4-image-selection-based-on-guard-values.png",
5: "5-selected-values-based-on-guard-values.png",
6: "6-boxplot-of-guarded-values.png",
7: "7-selected-images-outliers-removed.png",
8: "8-selected-images-outliers-removed-rolling-min-applied.png",
9: "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png",
10: "11-finding-minima-and-maxima.png",
5: "5-selected-images-based-on-guard-values.png",
6: "6-selected-images-rolling-min-applied.png",
7: "7-selected-images-rolling-min-savgol-filtered.png",
8: "8-finding-minima-and-maxima.png",
}
@ -118,8 +116,8 @@ def find_guard_threshold(data_frame, parameters): @@ -118,8 +116,8 @@ def find_guard_threshold(data_frame, parameters):
guard_data = numpy.histogram(
guard_values, bins=parameters.guard_histogram_bins
)
guard_counts = guard_data[0].astype(numpy.float16)
guard_edges = guard_data[1][1:] # edges enclose the counts
guard_counts = numpy.concatenate([[0], guard_data[0]]).astype(numpy.float16)
guard_edges = guard_data[1] # edges enclose the counts
pyplot.clf()
seaborn.lineplot(x=guard_edges, y=guard_counts)
@ -235,47 +233,9 @@ def check_guards(data_frame, parameters): @@ -235,47 +233,9 @@ def check_guards(data_frame, parameters):
return data_frame
def find_outliers(data_frame, parameters):
def select_on_guards(data_frame, parameters):
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
seaborn.boxplot(data=guarded_df, x=parameters.roi_column)
pyplot.title(f"Boxblot of guarded values")
pyplot.xlabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[6]
pyplot.savefig(str(path))
lower_quartil = guarded_df[parameters.roi_column].quantile(0.25)
upper_quartil = guarded_df[parameters.roi_column].quantile(0.75)
inter_quartil_range = upper_quartil - lower_quartil
parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
data_frame["outlier.ok"] = (
data_frame[parameters.roi_column] < parameters.outlier_upper_limit
)
return data_frame
def select_on_guards_and_outliers(data_frame, parameters):
data_frame["outlier_guards.ok"] = (
data_frame["guards.ok"] & data_frame["outlier.ok"]
)
mask = data_frame["outlier_guards.ok"] == True # noqa: E712
selected_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=parameters.roi_column, data=selected_df
)
pyplot.title(f"Selected Images, outliers removed")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / IMAGE_NAMES[7]
pyplot.savefig(str(path))
return selected_df
@ -296,11 +256,11 @@ def smooth_rolling_min(selected_df, parameters): @@ -296,11 +256,11 @@ def smooth_rolling_min(selected_df, parameters):
ax = seaborn.scatterplot(
x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df
)
pyplot.title(f"Selected Images, outliers removed, rolling min applied")
pyplot.title(f"Selected Images, rolling min applied")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / IMAGE_NAMES[8]
path = parameters.data_dir / IMAGE_NAMES[6]
pyplot.savefig(str(path))
return selected_df
@ -319,21 +279,17 @@ def smooth_savgol_filter(selected_df, parameters): @@ -319,21 +279,17 @@ def smooth_savgol_filter(selected_df, parameters):
x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df
)
pyplot.title(
(
f"Selected Images, outliers removed,"
f" rolling min applied, Savitzky-Golay filtered"
)
f"Selected Images, rolling min applied, Savitzky-Golay filtered"
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[9]
path = parameters.data_dir / IMAGE_NAMES[7]
pyplot.savefig(str(path))
return selected_df
def find_extremas(selected_df, parameters):
max_indexes = peakutils.indexes(
selected_df[f"{parameters.roi_name}.savgol"],
thres=parameters.peak_threshold,
@ -358,10 +314,10 @@ def find_extremas(selected_df, parameters): @@ -358,10 +314,10 @@ def find_extremas(selected_df, parameters):
min_indexes,
"minima",
)
pyplot.title(f"Finding Minimas")
pyplot.title(f"Finding Extrema")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[10]
path = parameters.data_dir / IMAGE_NAMES[8]
pyplot.savefig(str(path))
maximas["is_maxima"] = True
@ -378,7 +334,7 @@ def save_data(data_frame, selected_df, extremas_df, parameters): @@ -378,7 +334,7 @@ def save_data(data_frame, selected_df, extremas_df, parameters):
selected_df.to_excel(writer, sheet_name="selected data")
data_frame.to_excel(writer, sheet_name="raw data")
ignore_parameters = {"tif_list", "cuts_dir"}
ignore_parameters = {"tif_list"}
tmp_parameters = {
k: [v] for k, v in parameters.items() if k not in ignore_parameters
}
@ -462,23 +418,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters): @@ -462,23 +418,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters):
str(data_dir / IMAGE_NAMES[5]), width=img_width, height=img_height
),
PageBreak(),
Paragraph("Removing Outliers", style_section),
text_and_graph(6, "From the remaining values, outliers are removed."),
text_and_graph(
7,
(
f"From {num_images} images {num_discarded} images were "
f"discarded, leaving {num_selected} selected. The finally "
"selected values are listed in the excel sheet 'selection' "
"in the data file."
),
),
PageBreak(),
Paragraph(
"Experimental: Applying a rolling min calculation", style_section
),
text_and_graph(
8,
6,
(
"Due to the nature of the experiment, unusable images tend "
"to have a higher value as the desiered ones. Therfore a "
@ -487,11 +431,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters): @@ -487,11 +431,11 @@ def create_report(data_frame, selected_df, extremas_df, parameters):
),
Paragraph("Experimental: Finding Maxima and Minima", style_section),
text_and_graph(
9,
7,
"To smooth the resulting curve, a Savitzky-Golay filter is used.",
),
text_and_graph(
10,
8,
(
"The most interesting data points should be the maxima and "
"minima of this curve. These are listed in the sheet "

Loading…
Cancel
Save