Image analysis for the mTor project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

403 lines
12 KiB

import numpy
import pandas
import pathlib
import peakutils
import pickle
import seaborn
import matplotlib.pyplot as pyplot
from scipy.signal import savgol_filter
from .commons import ROI_STATISTIC_FUNCTIONS
def peakplot_iloc(x, y, ix1, ix1_label="peaks", ix2=None, ix2_label="peaks"):
"""
Plots the original data with the peaks that were identified
The original peakutils.plot function imported as "peakplot" has a little
bug, when displaying dataframes with some values removed. The
implementation below fixes this.
Parameters
----------
x : array-like
Data on the x-axis
y : array-like
Data on the y-axis
ind : array-like
Indexes of the identified peaks
"""
pyplot.plot(x, y)
num_items = len(ix1)
pyplot.plot(
x.iloc[ix1],
y.iloc[ix1],
"r+",
ms=5,
mew=2,
label=f"{num_items} {ix1_label}",
)
if ix2 is not None:
num_items = len(ix2)
pyplot.plot(
x.iloc[ix2],
y.iloc[ix2],
"g1",
ms=5,
mew=2,
label=f"{num_items} {ix2_label}",
)
pyplot.legend()
def set_plotting_styles():
seaborn.set_style("darkgrid")
seaborn.set_style(
"ticks",
{
"legend.frameon": True,
"xtick.direction": "in",
"ytick.direction": "in",
"axes.linewidth": 2,
},
)
seaborn.set(rc={"figure.figsize": (12, 9)})
seaborn.set_context("talk")
def init_pandas_data_frame(parameters):
columns = ["file", "frame"]
groups = (
parameters.roi_name,
parameters.left_guard_name,
parameters.right_guard_name,
)
for group in groups:
for func_name in ROI_STATISTIC_FUNCTIONS:
columns.append(f"{group}.{func_name}")
return pandas.DataFrame(columns=columns)
def construct_data_frame(stats_results, parameters):
data_frame = init_pandas_data_frame(parameters)
data_frame = data_frame.append(stats_results, ignore_index=True)
return data_frame.sort_values(by=["frame"]).reset_index(drop=True)
def find_guard_threshold(data_frame, parameters):
left_values = data_frame[parameters.left_guard_column]
right_values = data_frame[parameters.right_guard_column]
guard_values = left_values.append(right_values, ignore_index=True)
guard_data = numpy.histogram(
guard_values, bins=parameters.guard_histogram_bins
)
guard_counts = guard_data[0].astype(numpy.float16)
guard_edges = guard_data[1][1:] # edges enclose the counts
pyplot.clf()
seaborn.lineplot(x=guard_edges, y=guard_counts)
pyplot.title("Histogram of Guard Avarages (not filtered)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = (
parameters.data_dir / "1-histogram-of-guard-avarages-not-filtered.png"
)
pyplot.savefig(str(path))
guard_counts_filtered = savgol_filter(
guard_counts,
parameters.guard_filter_window,
parameters.guard_filter_polynom,
)
pyplot.clf()
seaborn.lineplot(x=guard_edges, y=guard_counts_filtered)
pyplot.title("Histogram of Guard Avarages (Savitzky-Golay filter)")
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = parameters.data_dir / "2-histogram-of-guard-avarages-filtered.png"
pyplot.savefig(str(path))
# Finding the first minima after the first peak
# In a first step, the location of the first peak is determined.
# Since the algorithm only finds peaks, we need to inverse the data points
# and start after the first peak to find the first minima.
indexes = peakutils.indexes(guard_counts_filtered)
first_peak_position = indexes[0]
# looks like magic, but is standard numpy behaviour
inverted_series = max(guard_counts_filtered) - guard_counts_filtered
indexes = peakutils.indexes(
inverted_series[first_peak_position:],
min_dist=parameters.guards_minima_min_dist,
)
# since we shortened the data, we need to add the first peak position
first_minima_position = indexes[0] + first_peak_position
parameters.guard_max_value = guard_edges[first_minima_position]
pyplot.clf()
peakplot_iloc(
pandas.Series(guard_edges), pandas.Series(guard_counts_filtered), [first_minima_position], "minima"
)
pyplot.title(
(
f"Histogram of Guard Avarages (Savitzky-Golay filter),"
f" first minima at {int(parameters.guard_max_value)} au"
)
)
pyplot.xlabel("Average Intensity [au]")
pyplot.ylabel("Number of Observations [1]")
path = (
parameters.data_dir
/ "3-histogram-of-guard-avarages-filtered-with-first-minima.png"
)
pyplot.savefig(str(path))
def check_guards(data_frame, parameters):
for what in (parameters.left_guard_name, parameters.right_guard_name):
ok_col = f"{what}.ok"
data_frame[ok_col] = False
mask = (
data_frame[f"{what}.{parameters.guard_stats}"]
< parameters.guard_max_value
)
data_frame.loc[mask, ok_col] = True
data_frame["guards.ok"] = (
data_frame[f"{parameters.left_guard_name}.ok"]
& data_frame[f"{parameters.right_guard_name}.ok"]
)
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame",
y=parameters.roi_column,
data=data_frame,
hue="guards.ok",
hue_order=[True, False],
palette={True: "b", False: "r"},
)
pyplot.title("Selection based on guard values")
pyplot.ylabel("Average Intensity [au]")
pyplot.ylabel("Frame Number [1]")
path = parameters.data_dir / "4-image-selection-based-on-guard-values.png"
pyplot.savefig(str(path))
parameters.charts_y_limit = ax.get_ylim()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=parameters.roi_column, data=guarded_df
)
count_all_images = len(data_frame)
count_guarded_images = len(guarded_df)
pyplot.title(
(
f"Selection, based on guard values"
f" ({count_guarded_images} of {count_all_images})"
)
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / "5-selected-values-based-on-guard-values.png"
pyplot.savefig(str(path))
return data_frame
def find_outliers(data_frame, parameters):
mask = data_frame["guards.ok"] == True # noqa: E712
guarded_df = data_frame[mask].copy()
pyplot.clf()
seaborn.boxplot(data=guarded_df, x=parameters.roi_column)
pyplot.title(f"Boxblot of guarded values")
pyplot.xlabel("Average Intensity [au]")
path = parameters.data_dir / "6-boxplot-of-guarded-values.png"
pyplot.savefig(str(path))
lower_quartil = guarded_df[parameters.roi_column].quantile(0.25)
upper_quartil = guarded_df[parameters.roi_column].quantile(0.75)
inter_quartil_range = upper_quartil - lower_quartil
parameters.outlier_upper_limit = upper_quartil + 1.5 * inter_quartil_range
data_frame["outlier.ok"] = (
data_frame[parameters.roi_column] < parameters.outlier_upper_limit
)
return data_frame
def select_on_guards_and_outliers(data_frame, parameters):
data_frame["outlier_guards.ok"] = (
data_frame["guards.ok"] & data_frame["outlier.ok"]
)
mask = data_frame["outlier_guards.ok"] == True # noqa: E712
selected_df = data_frame[mask].copy()
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=parameters.roi_column, data=selected_df
)
pyplot.title(f"Selected Images, outliers removed")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = parameters.data_dir / "7-selected-images-outliers-removed.png"
pyplot.savefig(str(path))
return selected_df
def smooth_rolling_min(selected_df, parameters):
rm = (
selected_df[parameters.roi_column]
.rolling(parameters.rolling_min_window)
.min()
)
# after a rolling window calculation, the first values will be NaN,
# we need to fill them
selected_df[f"{parameters.roi_name}.rolling.min"] = rm.fillna(
method="backfill"
)
pyplot.clf()
ax = seaborn.scatterplot(
x="frame", y=f"{parameters.roi_name}.rolling.min", data=selected_df
)
pyplot.title(f"Selected Images, outliers removed, rolling min applied")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
ax.set_ylim(parameters.charts_y_limit)
path = (
parameters.data_dir
/ "8-selected-images-outliers-removed-rolling-min-applied.png"
)
pyplot.savefig(str(path))
return selected_df
def smooth_savgol_filter(selected_df, parameters):
filtered = savgol_filter(
selected_df[f"{parameters.roi_name}.rolling.min"],
parameters.savgol_filter_window,
parameters.savgol_filter_polynom,
)
selected_df[f"{parameters.roi_name}.savgol"] = filtered
pyplot.clf()
seaborn.lineplot(
x="frame", y=f"{parameters.roi_name}.savgol", data=selected_df
)
pyplot.title(
(
f"Selected Images, outliers removed,"
f" rolling min applied, Savitzky-Golay filtered"
)
)
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = (
parameters.data_dir
/ "9-selected-images-outliers-removed-rolling-min-savgol-filtered.png"
)
pyplot.savefig(str(path))
return selected_df
def find_extremas(selected_df, parameters):
max_indexes = peakutils.indexes(
selected_df[f"{parameters.roi_name}.savgol"],
thres=parameters.peak_threshold,
min_dist=parameters.peak_min_distance,
)
maximas = selected_df.iloc[max_indexes].copy()
pyplot.clf()
peakplot_iloc(
selected_df["frame"],
selected_df[f"{parameters.roi_name}.savgol"],
max_indexes,
"maxima",
)
pyplot.title(f"Finding Maximas")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / "10-finding-maximas.png"
pyplot.savefig(str(path))
inverted_series = (
max(selected_df[f"{parameters.roi_name}.savgol"])
- selected_df[f"{parameters.roi_name}.savgol"]
)
min_indexes = peakutils.indexes(
inverted_series, min_dist=parameters.peak_min_distance
)
minimas = selected_df.iloc[min_indexes].copy()
pyplot.clf()
peakplot_iloc(
selected_df["frame"],
selected_df[f"{parameters.roi_name}.savgol"],
max_indexes,
"maxima",
min_indexes,
"minima",
)
pyplot.title(f"Finding Minimas")
pyplot.xlabel("Frame Number [1]")
pyplot.ylabel("Average Intensity [au]")
path = parameters.data_dir / "11-finding-minimas.png"
pyplot.savefig(str(path))
maximas["is_maxima"] = True
minimas["is_maxima"] = False
extremas_df = pandas.concat([maximas, minimas]).sort_index()
return extremas_df
def save_data(data_frame, selected_df, extremas_df, parameters):
path = parameters.data_dir / "numeric-data.xlsx"
writer = pandas.ExcelWriter(path, engine="xlsxwriter")
extremas_df.to_excel(writer, sheet_name="extremas")
selected_df.to_excel(writer, sheet_name="selected data")
data_frame.to_excel(writer, sheet_name="raw data")
ignore_parameters = {"tif_list", "cuts_dir"}
tmp_parameters = {
k: [v] for k, v in parameters.items() if k not in ignore_parameters
}
tmp_setings_df = pandas.DataFrame(tmp_parameters).T
tmp_setings_df.to_excel(writer, sheet_name="parameters")
writer.save()
def save_temp(data_frame, parameters):
csv_path = parameters.tif_dir / "_data.csv"
data_frame.to_csv(csv_path, sep="\t")
parameters_path = parameters.tif_dir / "_parameters.pickle"
with open(parameters_path, "wb") as fh:
pickle.dump(parameters, fh)
def load_temp(folder):
dir_path = pathlib.Path(folder)
csv_path = dir_path / "_data.csv"
data_frame = pandas.read_csv(csv_path, sep="\t", index_col=0)
parameters_path = dir_path / "_parameters.pickle"
with open(parameters_path, "rb") as fh:
parameters = pickle.load(fh)
return data_frame, parameters