|
|
|
@ -10,14 +10,12 @@ from collections import namedtuple
@@ -10,14 +10,12 @@ from collections import namedtuple
|
|
|
|
|
import pandas |
|
|
|
|
|
|
|
|
|
from .columns import ( |
|
|
|
|
COL_NAME_POS_ID, |
|
|
|
|
COL_NAME_WELL_ROW, |
|
|
|
|
COL_NAME_SPOT_FOUND, |
|
|
|
|
RAW_DATA_POS_ID, |
|
|
|
|
META_DATA_WELL_ROW, |
|
|
|
|
RAW_DATA_COLUMN_SET, |
|
|
|
|
COL_NAME_EXPOSURE_ID, |
|
|
|
|
COL_NAME_WELL_COLUMN, |
|
|
|
|
COL_NAME_SPOT_DIAMETER, |
|
|
|
|
COLUMNS_RENAME_MAP |
|
|
|
|
META_DATA_EXPOSURE_ID, |
|
|
|
|
META_DATA_WELL_COLUMN, |
|
|
|
|
RAW_DATA_COLUMNS_RENAME_MAP, |
|
|
|
|
) |
|
|
|
|
from .parameters import add_optional_measurement_parameters |
|
|
|
|
|
|
|
|
@ -29,12 +27,6 @@ REGEX_WELL = re.compile(
@@ -29,12 +27,6 @@ REGEX_WELL = re.compile(
|
|
|
|
|
re.VERBOSE | re.IGNORECASE, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
COLUMNS_RENAME_MAP = { |
|
|
|
|
" ID ": COL_NAME_POS_ID, |
|
|
|
|
"Found": COL_NAME_SPOT_FOUND, |
|
|
|
|
"Dia.": COL_NAME_SPOT_DIAMETER, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -71,7 +63,7 @@ def _extract_measurement_info(data_file):
@@ -71,7 +63,7 @@ def _extract_measurement_info(data_file):
|
|
|
|
|
|
|
|
|
|
def _cleanup_data_columns(data_frame): |
|
|
|
|
""" renames some data columns for consistency and drops unused columns """ |
|
|
|
|
renamed = data_frame.rename(columns=COLUMNS_RENAME_MAP) |
|
|
|
|
renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP) |
|
|
|
|
surplus_columns = set(renamed.columns) - RAW_DATA_COLUMN_SET |
|
|
|
|
return renamed.drop(columns=surplus_columns) |
|
|
|
|
|
|
|
|
@ -86,9 +78,9 @@ def parse_file(data_file, silent=False):
@@ -86,9 +78,9 @@ def parse_file(data_file, silent=False):
|
|
|
|
|
else: |
|
|
|
|
raise e |
|
|
|
|
data_frame = _parse_csv(data_file) |
|
|
|
|
data_frame[COL_NAME_WELL_ROW] = measurement_info.row |
|
|
|
|
data_frame[COL_NAME_WELL_COLUMN] = measurement_info.column |
|
|
|
|
data_frame[COL_NAME_EXPOSURE_ID] = measurement_info.exposure |
|
|
|
|
data_frame[META_DATA_WELL_ROW] = measurement_info.row |
|
|
|
|
data_frame[META_DATA_WELL_COLUMN] = measurement_info.column |
|
|
|
|
data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure |
|
|
|
|
return _cleanup_data_columns(data_frame) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -101,7 +93,7 @@ def parse_multiple_files(file_list):
@@ -101,7 +93,7 @@ def parse_multiple_files(file_list):
|
|
|
|
|
data_frame = next(filtered) |
|
|
|
|
for next_frame in filtered: |
|
|
|
|
data_frame = data_frame.append(next_frame, ignore_index=True) |
|
|
|
|
data_frame[COL_NAME_WELL_ROW] = data_frame[COL_NAME_WELL_ROW].astype( |
|
|
|
|
data_frame[META_DATA_WELL_ROW] = data_frame[META_DATA_WELL_ROW].astype( |
|
|
|
|
"category" |
|
|
|
|
) |
|
|
|
|
return data_frame |
|
|
|
@ -117,10 +109,10 @@ def list_csv_files(folder):
@@ -117,10 +109,10 @@ def list_csv_files(folder):
|
|
|
|
|
|
|
|
|
|
def _sanity_check(data_frame): |
|
|
|
|
""" checks some basic constrains of a combined data frame """ |
|
|
|
|
field_rows = len(data_frame[COL_NAME_WELL_ROW].unique()) |
|
|
|
|
field_cols = len(data_frame[COL_NAME_WELL_COLUMN].unique()) |
|
|
|
|
exposures = len(data_frame[COL_NAME_EXPOSURE_ID].unique()) |
|
|
|
|
spot_positions = len(data_frame[COL_NAME_POS_ID].unique()) |
|
|
|
|
field_rows = len(data_frame[META_DATA_WELL_ROW].unique()) |
|
|
|
|
field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique()) |
|
|
|
|
exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique()) |
|
|
|
|
spot_positions = len(data_frame[RAW_DATA_POS_ID].unique()) |
|
|
|
|
expected_rows = field_rows * field_cols * exposures * spot_positions |
|
|
|
|
if expected_rows != len(data_frame): |
|
|
|
|
raise ValueError("Measurements are missing") |
|
|
|
@ -129,7 +121,7 @@ def _sanity_check(data_frame):
@@ -129,7 +121,7 @@ def _sanity_check(data_frame):
|
|
|
|
|
|
|
|
|
|
def parse_folder(folder): |
|
|
|
|
""" parses all csv files in a folder to one large dataframe """ |
|
|
|
|
file = list_csv_files(Path(folder)) |
|
|
|
|
file_list = list_csv_files(Path(folder)) |
|
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder) |
|
|
|
|
return _sanity_check(data_frame) |
|
|
|
|