You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
521 lines
16 KiB
521 lines
16 KiB
import numpy |
import pandas |
import pathlib |
import peakutils |
import pickle |
import seaborn |
import matplotlib.pyplot as pyplot |
from reportlab.platypus import ( |
SimpleDocTemplate, |
Paragraph, |
PageBreak, |
Spacer, |
KeepTogether, |
Image, |
) |
from reportlab.lib.pagesizes import A4 |
from reportlab.lib.styles import getSampleStyleSheet |
from reportlab.lib.units import mm |
from scipy.signal import savgol_filter |
from .commons import ROI_STATISTIC_FUNCTIONS |
1: "1-histogram-of-guard-avarages-not-filtered.png", |
2: "2-histogram-of-guard-avarages-filtered.png", |
3: "3-histogram-of-guard-avarages-filtered-with-first-minima.png", |
4: "4-image-selection-based-on-guard-values.png", |
5: "5-selected-values-based-on-guard-values.png", |
6: "6-boxplot-of-guarded-values.png", |
7: "7-selected-images-outliers-removed.png", |
8: "8-selected-images-outliers-removed-rolling-min-applied.png", |
9: "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png", |
10: "11-finding-minima-and-maxima.png", |
} |
def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"): |
""" |
Plots the original data with the peaks that were identified |
The original peakutils.plot function imported as "peakplot" has a little |
bug, when displaying dataframes with some values removed. The |
implementation below fixes this. |
Parameters |
---------- |
x : array-like |
Data on the x-axis |
y : array-like |
Data on the y-axis |
ind : array-like |
Indexes of the identified peaks |
""" |
pyplot.plot(x, y) |
num_items = len(ix1) |
pyplot.plot( |
x.iloc[ix1], |
y.iloc[ix1], |
"r+", |
ms=5, |
mew=2, |
label=f"{num_items} {ix1_label}", |
) |
if ix2 is not None: |
num_items = len(ix2) |
pyplot.plot( |
x.iloc[ix2], |
y.iloc[ix2], |
"g1", |
ms=5, |
mew=2, |
label=f"{num_items} {ix2_label}", |
) |
pyplot.legend() |
def set_plotting_styles(): |
seaborn.set_style("darkgrid") |
seaborn.set_style( |
"ticks", |
{ |
"legend.frameon": True, |
"xtick.direction": "in", |
"ytick.direction": "in", |
"axes.linewidth": 2, |
}, |
) |
seaborn.set(rc={"figure.figsize": (12, 9)}) |
seaborn.set_context("talk") |
def init_pandas_data_frame(parameters): |
columns = ["file", "frame"] |
groups = ( |
parameters.roi_name, |
parameters.left_guard_name, |
parameters.right_guard_name, |
) |
for group in groups: |
for func_name in ROI_STATISTIC_FUNCTIONS: |
columns.append(f"{group}.{func_name}") |
return pandas.DataFrame(columns=columns) |
def construct_data_frame(stats_results, parameters): |
data_frame = init_pandas_data_frame(parameters) |
data_frame = data_frame.append(stats_results, ignore_index=True) |
return data_frame.sort_values(by=["frame"]).reset_index(drop=True) |
def find_guard_threshold(data_frame, parameters): |
left_values = data_frame[parameters.left_guard_column] |
right_values = data_frame[parameters.right_guard_column] |
guard_values = left_values.append(right_values, ignore_index=True) |
guard_data = numpy.histogram( |
guard_values, bins=parameters.guard_histogram_bins |
) |
guard_counts = guard_data[0].astype(numpy.float16) |
guard_edges = guard_data[1][1:] # edges enclose the counts |
pyplot.clf() |
seaborn.lineplot(x=guard_edges, y=guard_counts) |
pyplot.title("Histogram of Guard Avarages (not filtered)") |
pyplot.xlabel("Average Intensity [au]") |
pyplot.ylabel("Number of Observations [1]") |
path = parameters.data_dir / IMAGE_NAMES[1] |
pyplot.savefig(str(path)) |
guard_counts_filtered = savgol_filter( |
guard_counts, |
parameters.guard_filter_window, |
parameters.guard_filter_polynom, |
) |
pyplot.clf() |
seaborn.lineplot(x=guard_edges, y=guard_counts_filtered) |
pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)") |
pyplot.xlabel("Average Intensity [au]") |
pyplot.ylabel("Number of Observations [1]") |
path = parameters.data_dir / IMAGE_NAMES[2] |
pyplot.savefig(str(path)) |
# Finding the first minima after the first peak |
# In a first step, the location of the first peak is determined. |
# Since the algorithm only finds peaks, we need to inverse the data points |
# and start after the first peak to find the first minima. |
indexes = peakutils.indexes(guard_counts_filtered) |
first_peak_position = indexes[0] |
# looks like magic, but is standard numpy behaviour |
inverted_series = max(guard_counts_filtered) - guard_counts_filtered |
indexes = peakutils.indexes( |
inverted_series[first_peak_position:], |
min_dist=parameters.guards_minima_min_dist, |
) |
# since we shortened the data, we need to add the first peak position |
first_minima_position = indexes[0] + first_peak_position |
parameters.guard_max_value = guard_edges[first_minima_position] |
pyplot.clf() |
peakplot_iloc( |
pandas.Series(guard_edges), |
pandas.Series(guard_counts_filtered), |
[first_minima_position], |
"minima", |
) |
pyplot.title( |
( |
f"Histogram of Guard Avarages (Savitzky-Golay filter)," |
f" first minima at {int(parameters.guard_max_value)} au" |
) |
) |
pyplot.xlabel("Average Intensity [au]") |
pyplot.ylabel("Number of Observations [1]") |
path = parameters.data_dir / IMAGE_NAMES[3] |
pyplot.savefig(str(path)) |
def check_guards(data_frame, parameters): |
for what in (parameters.left_guard_name, parameters.right_guard_name): |
ok_col = f"{what}.ok" |
data_frame[ok_col] = False |
mask = ( |
data_frame[f"{what}.{parameters.guard_stats}"] |
< parameters.guard_max_value |
) |
data_frame.loc[mask, ok_col] = True |
data_frame["guards.ok"] = ( |
data_frame[f"{parameters.left_guard_name}.ok"] |
& data_frame[f"{parameters.right_guard_name}.ok"] |
) |
mask = data_frame["guards.ok"] == True # noqa: E712 |
guarded_df = data_frame[mask].copy() |
pyplot.clf() |
ax = seaborn.scatterplot( |
x="frame", |
y=parameters.roi_column, |
data=data_frame, |
hue="guards.ok", |
hue_order=[True, False], |
palette={True: "b", False: "r"}, |
) |
pyplot.title("Selection based on guard values") |
pyplot.ylabel("Average Intensity [au]") |
pyplot.ylabel("Frame Number [1]") |
path = parameters.data_dir / IMAGE_NAMES[4] |
pyplot.savefig(str(path)) |
parameters.charts_y_limit = ax.get_ylim() |
pyplot.clf() |
ax = seaborn.scatterplot( |
x="frame", y=parameters.roi_column, data=guarded_df |
) |
count_all_images = len(data_frame) |
count_guarded_images = len(guarded_df) |
pyplot.title( |
( |
f"Selection, based on guard values" |
f" ({count_guarded_images} of {count_all_images})" |
) |
) |
pyplot.xlabel("Frame Number [1]") |
pyplot.ylabel("Average Intensity [au]") |
ax.set_ylim(parameters.charts_y_limit) |
path = parameters.data_dir / IMAGE_NAMES[5] |
pyplot.savefig(str(path)) |
return data_frame |
def find_outliers(data_frame, parameters): |
mask = data_frame["guards.ok"] == True # noqa: E712 |
guarded_df = data_frame[mask].copy() |
pyplot.clf() |
seaborn.boxplot(data=guarded_df, x=parameters.roi_column) |
pyplot.title(f"Boxblot of guarded values") |
pyplot.xlabel("Average Intensity [au]") |
path = parameters.data_dir / IMAGE_NAMES[6] |
pyplot.savefig(str(path)) |
lower_quartil = guarded_df[parameters.roi_column].quantile(0.25) |
upper_quartil = guarded_df[parameters.roi_column].quantile(0.75) |
inter_quartil_range = upper_quartil - lower_quartil |
parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range |
data_frame["outlier.ok"] = ( |
data_frame[parameters.roi_column] < parameters.outlier_upper_limit |
) |
return data_frame |
def select_on_guards_and_outliers(data_frame, parameters): |
data_frame["outlier_guards.ok"] = ( |
data_frame["guards.ok"] & data_frame["outlier.ok"] |
) |
mask = data_frame["outlier_guards.ok"] == True # noqa: E712 |
selected_df = data_frame[mask].copy() |
pyplot.clf() |
ax = seaborn.scatterplot( |
x="frame", y=parameters.roi_column, data=selected_df |
) |
pyplot.title(f"Selected Images, outliers removed") |
pyplot.xlabel("Frame Number [1]") |
pyplot.ylabel("Average Intensity [au]") |
ax.set_ylim(parameters.charts_y_limit) |
path = parameters.data_dir / IMAGE_NAMES[7] |
pyplot.savefig(str(path)) |
return selected_df |
def smooth_rolling_min(selected_df, parameters): |
rm = ( |
selected_df[parameters.roi_column] |
.rolling(parameters.rolling_min_window) |
.min() |
) |
# after a rolling window calculation, the first values will be NaN, |
# we need to fill them |
selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna( |
method="backfill" |
) |
pyplot.clf() |
ax = seaborn.scatterplot( |
x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df |
) |
pyplot.title(f"Selected Images, outliers removed, rolling min applied") |
pyplot.xlabel("Frame Number [1]") |
pyplot.ylabel("Average Intensity [au]") |
ax.set_ylim(parameters.charts_y_limit) |
path = parameters.data_dir / IMAGE_NAMES[8] |
pyplot.savefig(str(path)) |
return selected_df |
def smooth_savgol_filter(selected_df, parameters): |
filtered = savgol_filter( |
selected_df[f"{parameters.roi_name}.rolling.min"], |
parameters.savgol_filter_window, |
parameters.savgol_filter_polynom, |
) |
selected_df[f"{parameters.roi_name}.savgol"] = filtered |
pyplot.clf() |
seaborn.lineplot( |
x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df |
) |
pyplot.title( |
( |
f"Selected Images, outliers removed," |
f" rolling min applied, Savitzky-Golay filtered" |
) |
) |
pyplot.xlabel("Frame Number [1]") |
pyplot.ylabel("Average Intensity [au]") |
path = parameters.data_dir / IMAGE_NAMES[9] |
pyplot.savefig(str(path)) |
return selected_df |
def find_extremas(selected_df, parameters): |
max_indexes = peakutils.indexes( |
selected_df[f"{parameters.roi_name}.savgol"], |
thres=parameters.peak_threshold, |
min_dist=parameters.peak_min_distance, |
) |
maximas = selected_df.iloc[max_indexes].copy() |
inverted_series = ( |
max(selected_df[f"{parameters.roi_name}.savgol"]) |
- selected_df[f"{parameters.roi_name}.savgol"] |
) |
min_indexes = peakutils.indexes( |
inverted_series, min_dist=parameters.peak_min_distance |
) |
minimas = selected_df.iloc[min_indexes].copy() |
pyplot.clf() |
peakplot_iloc( |
selected_df["frame"], |
selected_df[f"{parameters.roi_name}.savgol"], |
max_indexes, |
"maxima", |
min_indexes, |
"minima", |
) |
pyplot.title(f"Finding Minimas") |
pyplot.xlabel("Frame Number [1]") |
pyplot.ylabel("Average Intensity [au]") |
path = parameters.data_dir / IMAGE_NAMES[10] |
pyplot.savefig(str(path)) |
maximas["is_maxima"] = True |
minimas["is_maxima"] = False |
extremas_df = pandas.concat([maximas, minimas]).sort_index() |
return extremas_df |
def save_data(data_frame, selected_df, extremas_df, parameters): |
path = parameters.data_dir / "numeric-data.xlsx" |
writer = pandas.ExcelWriter(path, engine="xlsxwriter") |
extremas_df.to_excel(writer, sheet_name="extremas") |
selected_df.to_excel(writer, sheet_name="selected data") |
data_frame.to_excel(writer, sheet_name="raw data") |
ignore_parameters = {"tif_list", "cuts_dir"} |
tmp_parameters = { |
k: [v] for k, v in parameters.items() if k not in ignore_parameters |
} |
tmp_setings_df = pandas.DataFrame(tmp_parameters).T |
all_descriptions = parameters.get_descriptions() |
matched_descritions = { |
key: value |
for key, value in all_descriptions.items() |
if key in tmp_parameters |
} |
tmp_setings_df["Descriptions"] = pandas.Series(matched_descritions) |
tmp_setings_df.to_excel(writer, sheet_name="parameters") |
| |
def create_report(data_frame, selected_df, extremas_df, parameters): |
styles = getSampleStyleSheet() |
style_headline = styles["Heading1"] |
style_section = styles["Heading2"] |
style_text = styles["Normal"] |
data_dir = parameters.data_dir |
path = data_dir / "report.pdf" |
doc = SimpleDocTemplate(str(path), pagesize=A4) |
img_width = doc.width * 0.9 |
img_height = (900 * img_width / 1200) * 0.9 |
num_images = len(data_frame) |
num_selected = len(selected_df) |
num_discarded = num_images - num_selected |
def text_and_graph(image_nr, text): |
flowable = KeepTogether( |
[ |
Paragraph(text, style_text), |
Image( |
str(data_dir / IMAGE_NAMES[image_nr]), |
width=img_width, |
height=img_height, |
), |
Spacer(1, 7 * mm), |
] |
) |
return flowable |
story = [ |
Paragraph(f"Analysis of {num_images} Tif Images", style_headline), |
Spacer(1, 10 * mm), |
Paragraph("Estimating Guard Threshold", style_section), |
text_and_graph( |
1, |
( |
"In a first step, the histogram of the combined left and " |
"right guard values is calculated." |
), |
), |
text_and_graph( |
2, |
( |
"A Savitzky-Golay filter is applied to the histogram to " |
"smooth the curve." |
), |
), |
text_and_graph( |
3, |
( |
"The first minima after the first peak is used as the guard " |
f"threshold value: {int(parameters.guard_max_value)} au" |
), |
), |
text_and_graph( |
4, |
( |
"The images with one of the guard values above the threshold " |
"are discarded." |
), |
), |
Image( |
str(data_dir / IMAGE_NAMES[5]), width=img_width, height=img_height |
), |
PageBreak(), |
Paragraph("Removing Outliers", style_section), |
text_and_graph(6, "From the remaining values, outliers are removed."), |
text_and_graph( |
7, |
( |
f"From {num_images} images {num_discarded} images were " |
f"discarded, leaving {num_selected} selected. The finally " |
"selected values are listed in the excel sheet 'selection' " |
"in the data file." |
), |
), |
PageBreak(), |
Paragraph( |
"Experimental: Applying a rolling min calculation", style_section |
), |
text_and_graph( |
8, |
( |
"Due to the nature of the experiment, unusable images tend " |
"to have a higher value as the desiered ones. Therfore a " |
"rolling min filter is applied" |
), |
), |
Paragraph("Experimental: Finding Maxima and Minima", style_section), |
text_and_graph( |
9, |
"To smooth the resulting curve, a Savitzky-Golay filter is used.", |
), |
text_and_graph( |
10, |
( |
"The most interesting data points should be the maxima and " |
"minima of this curve. These are listed in the sheet " |
"'extremas' in the data file" |
), |
), |
] |
| |
def save_temp(data_frame, parameters): |
csv_path = parameters.tif_dir / "_data.csv" |
data_frame.to_csv(csv_path, sep="\t") |
parameters_path = parameters.tif_dir / "_parameters.pickle" |
with open(parameters_path, "wb") as fh: |
pickle.dump(parameters, fh) |
def load_temp(folder): |
dir_path = pathlib.Path(folder) |
csv_path = dir_path / "_data.csv" |
data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0) |
parameters_path = dir_path / "_parameters.pickle" |
with open(parameters_path, "rb") as fh: |
parameters = pickle.load(fh) |
return data_frame, parameters