Image analysis for the mTor project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

521 lines
16 KiB

import numpy
import pandas
import pathlib
import peakutils
import pickle
import seaborn
import matplotlib.pyplot as pyplot
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
PageBreak,
Spacer,
KeepTogether,
Image,
)
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import mm
from scipy.signal import savgol_filter
from .commons import ROI_STATISTIC_FUNCTIONS
IMAGE_NAMES = {
1: "1-histogram-of-guard-avarages-not-filtered.png",
2: "2-histogram-of-guard-avarages-filtered.png",
3: "3-histogram-of-guard-avarages-filtered-with-first-minima.png",
4: "4-image-selection-based-on-guard-values.png",
5: "5-selected-values-based-on-guard-values.png",
6: "6-boxplot-of-guarded-values.png",
7: "7-selected-images-outliers-removed.png",
8: "8-selected-images-outliers-removed-rolling-min-applied.png",
9: "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png",
10: "11-finding-minima-and-maxima.png",
}
def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"):
"""
Plots the original data with the peaks that were identified
The original peakutils.plot function imported as "peakplot" has a little
bug, when displaying dataframes with some values removed. The
implementation below fixes this.
Parameters
----------
x : array-like
Data on the x-axis
y : array-like
Data on the y-axis
ind : array-like
Indexes of the identified peaks
"""
pyplot.plot(x, y)
num_items = len(ix1)
pyplot.plot(
x.iloc[ix1],
y.iloc[ix1],
"r+",
ms=5,
mew=2,
label=f"{num_items} {ix1_label}",
)
if ix2 is not None:
num_items = len(ix2)
pyplot.plot(
x.iloc[ix2],
y.iloc[ix2],
"g1",
ms=5,
mew=2,
label=f"{num_items} {ix2_label}",
)
pyplot.legend()
def set_plotting_styles():
seaborn.set_style("darkgrid")
seaborn.set_style(
"ticks",
{
"legend.frameon": True,
"xtick.direction": "in",
"ytick.direction": "in",
"axes.linewidth": 2,
},
)
seaborn.set(rc={"figure.figsize": (12, 9)})
seaborn.set_context("talk")
def init_pandas_data_frame(parameters):
columns = ["file", "frame"]
groups = (
parameters.roi_name,
parameters.left_guard_name,
parameters.right_guard_name,
)
for group in groups:
for func_name in ROI_STATISTIC_FUNCTIONS:
columns.append(f"{group}.{func_name}")
return pandas.DataFrame(columns=columns)
def construct_data_frame(stats_results, parameters):
data_frame = init_pandas_data_frame(parameters)
data_frame = data_frame.append(stats_results, ignore_index=True)
return data_frame.sort_values(by=["frame"]).reset_index(drop=True)
def find_guard_threshold(data_frame, parameters):
left_values = data_frame[parameters.left_guard_column]
right_values = data_frame[parameters.right_guard_column]
guard_values = left_values.append(right_values, ignore_index=True)
guard_data = numpy.histogram(
guard_values, bins=parameters.guard_histogram_bins
)
guard_counts = guard_data[0].astype(numpy.float16)
guard_edges = guard_data[1][1:] # edges enclose the counts
pyplot.clf()
seaborn.lineplot(x=guard_edges, y=guard_counts)
pyplot.title("Histogram of Guard Avarages (not filtered)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = parameters.data_dir / IMAGE_NAMES[1]
pyplot.savefig(str(path))
guard_counts_filtered = savgol_filter(
guard_counts,
parameters.guard_filter_window,
parameters.guard_filter_polynom,
)
pyplot.clf()
seaborn.lineplot(x=guard_edges, y=guard_counts_filtered)
pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = parameters.data_dir / IMAGE_NAMES[2]
pyplot.savefig(str(path))
# Finding the first minima after the first peak
# In a first step, the location of the first peak is determined.
# Since the algorithm only finds peaks, we need to inverse the data points
# and start after the first peak to find the first minima.
indexes = peakutils.indexes(guard_counts_filtered)
first_peak_position = indexes[0]
# looks like magic, but is standard numpy behaviour
inverted_series = max(guard_counts_filtered) - guard_counts_filtered
indexes = peakutils.indexes(
inverted_series[first_peak_position:],
min_dist=parameters.guards_minima_min_dist,
)
# since we shortened the data, we need to add the first peak position
first_minima_position = indexes[0] + first_peak_position
parameters.guard_max_value = guard_edges[first_minima_position]
pyplot.clf()
peakplot_iloc(
pandas.Series(guard_edges),
pandas.Series(guard_counts_filtered),
[first_minima_position],
"minima",
)
pyplot.title(
(
f"Histogram of Guard Avarages (Savitzky-Golay filter),"
f" first minima at {int(parameters.guard_max_value)} au"
)
)
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = parameters.data_dir / IMAGE_NAMES[3]
pyplot.savefig(str(path))
def check_guards(data_frame, parameters):
for what in (parameters.left_guard_name, parameters.right_guard_name):
ok_col = f"{what}.ok"
data_frame[ok_col] = False
mask = (
data_frame[f"{what}.{parameters.guard_stats}"]
< parameters.guard_max_value
)
data_frame.loc[mask, ok_col] = True
data_frame["guards.ok"] = (
data_frame[f"{parameters.left_guard_name}.ok"]
& data_frame[f"{parameters.right_guard_name}.ok"]
)
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame",
y=parameters.roi_column,
data=data_frame,
hue="guards.ok",
hue_order=[True, False],
palette={True: "b", False: "r"},
)
pyplot.title("Selection based on guard values")
pyplot.ylabel("Average Intensity [au]")
pyplot.ylabel("Frame Number [1]")
path = parameters.data_dir / IMAGE_NAMES[4]
pyplot.savefig(str(path))
parameters.charts_y_limit = ax.get_ylim()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=parameters.roi_column, data=guarded_df
)
count_all_images = len(data_frame)
count_guarded_images = len(guarded_df)
pyplot.title(
(
f"Selection, based on guard values"
f" ({count_guarded_images} of {count_all_images})"
)
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / IMAGE_NAMES[5]
pyplot.savefig(str(path))
return data_frame
def find_outliers(data_frame, parameters):
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
seaborn.boxplot(data=guarded_df, x=parameters.roi_column)
pyplot.title(f"Boxblot of guarded values")
pyplot.xlabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[6]
pyplot.savefig(str(path))
lower_quartil = guarded_df[parameters.roi_column].quantile(0.25)
upper_quartil = guarded_df[parameters.roi_column].quantile(0.75)
inter_quartil_range = upper_quartil - lower_quartil
parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
data_frame["outlier.ok"] = (
data_frame[parameters.roi_column] < parameters.outlier_upper_limit
)
return data_frame
def select_on_guards_and_outliers(data_frame, parameters):
data_frame["outlier_guards.ok"] = (
data_frame["guards.ok"] & data_frame["outlier.ok"]
)
mask = data_frame["outlier_guards.ok"] == True # noqa: E712
selected_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=parameters.roi_column, data=selected_df
)
pyplot.title(f"Selected Images, outliers removed")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / IMAGE_NAMES[7]
pyplot.savefig(str(path))
return selected_df
def smooth_rolling_min(selected_df, parameters):
rm = (
selected_df[parameters.roi_column]
.rolling(parameters.rolling_min_window)
.min()
)
# after a rolling window calculation, the first values will be NaN,
# we need to fill them
selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna(
method="backfill"
)
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df
)
pyplot.title(f"Selected Images, outliers removed, rolling min applied")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / IMAGE_NAMES[8]
pyplot.savefig(str(path))
return selected_df
def smooth_savgol_filter(selected_df, parameters):
filtered = savgol_filter(
selected_df[f"{parameters.roi_name}.rolling.min"],
parameters.savgol_filter_window,
parameters.savgol_filter_polynom,
)
selected_df[f"{parameters.roi_name}.savgol"] = filtered
pyplot.clf()
seaborn.lineplot(
x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df
)
pyplot.title(
(
f"Selected Images, outliers removed,"
f" rolling min applied, Savitzky-Golay filtered"
)
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[9]
pyplot.savefig(str(path))
return selected_df
def find_extremas(selected_df, parameters):
max_indexes = peakutils.indexes(
selected_df[f"{parameters.roi_name}.savgol"],
thres=parameters.peak_threshold,
min_dist=parameters.peak_min_distance,
)
maximas = selected_df.iloc[max_indexes].copy()
inverted_series = (
max(selected_df[f"{parameters.roi_name}.savgol"])
- selected_df[f"{parameters.roi_name}.savgol"]
)
min_indexes = peakutils.indexes(
inverted_series, min_dist=parameters.peak_min_distance
)
minimas = selected_df.iloc[min_indexes].copy()
pyplot.clf()
peakplot_iloc(
selected_df["frame"],
selected_df[f"{parameters.roi_name}.savgol"],
max_indexes,
"maxima",
min_indexes,
"minima",
)
pyplot.title(f"Finding Minimas")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / IMAGE_NAMES[10]
pyplot.savefig(str(path))
maximas["is_maxima"] = True
minimas["is_maxima"] = False
extremas_df = pandas.concat([maximas, minimas]).sort_index()
return extremas_df
def save_data(data_frame, selected_df, extremas_df, parameters):
path = parameters.data_dir / "numeric-data.xlsx"
writer = pandas.ExcelWriter(path, engine="xlsxwriter")
extremas_df.to_excel(writer, sheet_name="extremas")
selected_df.to_excel(writer, sheet_name="selected data")
data_frame.to_excel(writer, sheet_name="raw data")
ignore_parameters = {"tif_list", "cuts_dir"}
tmp_parameters = {
k: [v] for k, v in parameters.items() if k not in ignore_parameters
}
tmp_setings_df = pandas.DataFrame(tmp_parameters).T
all_descriptions = parameters.get_descriptions()
matched_descritions = {
key: value
for key, value in all_descriptions.items()
if key in tmp_parameters
}
tmp_setings_df["Descriptions"] = pandas.Series(matched_descritions)
tmp_setings_df.to_excel(writer, sheet_name="parameters")
writer.save()
def create_report(data_frame, selected_df, extremas_df, parameters):
styles = getSampleStyleSheet()
style_headline = styles["Heading1"]
style_section = styles["Heading2"]
style_text = styles["Normal"]
data_dir = parameters.data_dir
path = data_dir / "report.pdf"
doc = SimpleDocTemplate(str(path), pagesize=A4)
img_width = doc.width * 0.9
img_height = (900 * img_width / 1200) * 0.9
num_images = len(data_frame)
num_selected = len(selected_df)
num_discarded = num_images - num_selected
def text_and_graph(image_nr, text):
flowable = KeepTogether(
[
Paragraph(text, style_text),
Image(
str(data_dir / IMAGE_NAMES[image_nr]),
width=img_width,
height=img_height,
),
Spacer(1, 7 * mm),
]
)
return flowable
story = [
Paragraph(f"Analysis of {num_images} Tif Images", style_headline),
Spacer(1, 10 * mm),
Paragraph("Estimating Guard Threshold", style_section),
text_and_graph(
1,
(
"In a first step, the histogram of the combined left and "
"right guard values is calculated."
),
),
text_and_graph(
2,
(
"A Savitzky-Golay filter is applied to the histogram to "
"smooth the curve."
),
),
text_and_graph(
3,
(
"The first minima after the first peak is used as the guard "
f"threshold value: {int(parameters.guard_max_value)} au"
),
),
text_and_graph(
4,
(
"The images with one of the guard values above the threshold "
"are discarded."
),
),
Image(
str(data_dir / IMAGE_NAMES[5]), width=img_width, height=img_height
),
PageBreak(),
Paragraph("Removing Outliers", style_section),
text_and_graph(6, "From the remaining values, outliers are removed."),
text_and_graph(
7,
(
f"From {num_images} images {num_discarded} images were "
f"discarded, leaving {num_selected} selected. The finally "
"selected values are listed in the excel sheet 'selection' "
"in the data file."
),
),
PageBreak(),
Paragraph(
"Experimental: Applying a rolling min calculation", style_section
),
text_and_graph(
8,
(
"Due to the nature of the experiment, unusable images tend "
"to have a higher value as the desiered ones. Therfore a "
"rolling min filter is applied"
),
),
Paragraph("Experimental: Finding Maxima and Minima", style_section),
text_and_graph(
9,
"To smooth the resulting curve, a Savitzky-Golay filter is used.",
),
text_and_graph(
10,
(
"The most interesting data points should be the maxima and "
"minima of this curve. These are listed in the sheet "
"'extremas' in the data file"
),
),
]
doc.build(story)
def save_temp(data_frame, parameters):
csv_path = parameters.tif_dir / "_data.csv"
data_frame.to_csv(csv_path, sep="\t")
parameters_path = parameters.tif_dir / "_parameters.pickle"
with open(parameters_path, "wb") as fh:
pickle.dump(parameters, fh)
def load_temp(folder):
dir_path = pathlib.Path(folder)
csv_path = dir_path / "_data.csv"
data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0)
parameters_path = dir_path / "_parameters.pickle"
with open(parameters_path, "rb") as fh:
parameters = pickle.load(fh)
return data_frame, parameters