|
|
|
@ -17,6 +17,7 @@ from .columns import (
@@ -17,6 +17,7 @@ from .columns import (
|
|
|
|
|
COL_NAME_EXPOSURE_ID, |
|
|
|
|
COL_NAME_WELL_COLUMN, |
|
|
|
|
COL_NAME_SPOT_DIAMETER, |
|
|
|
|
COLUMNS_RENAME_MAP |
|
|
|
|
) |
|
|
|
|
from .parameters import add_optional_measurement_parameters |
|
|
|
|
|
|
|
|
@ -28,25 +29,15 @@ REGEX_WELL = re.compile(
@@ -28,25 +29,15 @@ REGEX_WELL = re.compile(
|
|
|
|
|
re.VERBOSE | re.IGNORECASE, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
COLUMNS_TO_DROP = ["Rect.", "Contour", "Id", "Name", "Foo"] |
|
|
|
|
COLUMNS_RENAME_MAP = { |
|
|
|
|
" ID ": COL_NAME_POS_ID, |
|
|
|
|
"Found": COL_NAME_SPOT_FOUND, |
|
|
|
|
"Dia.": COL_NAME_SPOT_DIAMETER, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CACHE_FILE_NAME = "raw_data.h5" |
|
|
|
|
|
|
|
|
|
FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_cache_table_name(): |
|
|
|
|
""" automatic hdf5 table name, avoids a circular import """ |
|
|
|
|
from . import VERSION_TABLE_NAME |
|
|
|
|
|
|
|
|
|
return VERSION_TABLE_NAME |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _guess_decimal_separator(file_handle): |
|
|
|
|
""" guesses the decimal spearator of a opened data file """ |
|
|
|
|
file_handle.seek(0) |
|
|
|
@ -85,12 +76,15 @@ def _cleanup_data_columns(data_frame):
@@ -85,12 +76,15 @@ def _cleanup_data_columns(data_frame):
|
|
|
|
|
return renamed.drop(columns=surplus_columns) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_file(data_file): |
|
|
|
|
def parse_file(data_file, silent=False): |
|
|
|
|
""" parses one data file and adds metadata to result """ |
|
|
|
|
try: |
|
|
|
|
measurement_info = _extract_measurement_info(Path(data_file)) |
|
|
|
|
except ValueError as e: |
|
|
|
|
return None |
|
|
|
|
if silent: |
|
|
|
|
return None |
|
|
|
|
else: |
|
|
|
|
raise e |
|
|
|
|
data_frame = _parse_csv(data_file) |
|
|
|
|
data_frame[COL_NAME_WELL_ROW] = measurement_info.row |
|
|
|
|
data_frame[COL_NAME_WELL_COLUMN] = measurement_info.column |
|
|
|
@ -102,7 +96,7 @@ def parse_multiple_files(file_list):
@@ -102,7 +96,7 @@ def parse_multiple_files(file_list):
|
|
|
|
|
""" parses a list of file paths to one combined dataframe """ |
|
|
|
|
if not file_list: |
|
|
|
|
raise ValueError("Empty file list provided") |
|
|
|
|
collection = (parse_file(path) for path in file_list) |
|
|
|
|
collection = (parse_file(path, silent=True) for path in file_list) |
|
|
|
|
filtered = (frame for frame in collection if frame is not None) |
|
|
|
|
data_frame = next(filtered) |
|
|
|
|
for next_frame in filtered: |
|
|
|
@ -113,7 +107,7 @@ def parse_multiple_files(file_list):
@@ -113,7 +107,7 @@ def parse_multiple_files(file_list):
|
|
|
|
|
return data_frame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _list_csv_files(folder): |
|
|
|
|
def list_csv_files(folder): |
|
|
|
|
""" returns all csv files in a folder """ |
|
|
|
|
folder_path = Path(folder) |
|
|
|
|
files = (item for item in folder_path.iterdir() if item.is_file()) |
|
|
|
@ -135,29 +129,7 @@ def _sanity_check(data_frame):
@@ -135,29 +129,7 @@ def _sanity_check(data_frame):
|
|
|
|
|
|
|
|
|
|
def parse_folder(folder): |
|
|
|
|
""" parses all csv files in a folder to one large dataframe """ |
|
|
|
|
file_list = _list_csv_files(Path(folder)) |
|
|
|
|
file = list_csv_files(Path(folder)) |
|
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder) |
|
|
|
|
return _sanity_check(data_frame) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_folder(folder, use_cache=True): |
|
|
|
|
""" parses all csv files in a folder, adds some checks and more data """ |
|
|
|
|
hdf5_path = Path(folder) / CACHE_FILE_NAME |
|
|
|
|
if use_cache: |
|
|
|
|
try: |
|
|
|
|
return pandas.read_hdf(hdf5_path, _get_cache_table_name()) |
|
|
|
|
except (FileNotFoundError, KeyError): |
|
|
|
|
# either file or table doesn't exist |
|
|
|
|
pass |
|
|
|
|
data_frame = parse_folder(folder) |
|
|
|
|
if use_cache: |
|
|
|
|
try: |
|
|
|
|
data_frame.to_hdf( |
|
|
|
|
hdf5_path, _get_cache_table_name(), format="table" |
|
|
|
|
) |
|
|
|
|
except OSError: |
|
|
|
|
# capturing high level OSError |
|
|
|
|
# read only filesystems don't throw a more specific exception |
|
|
|
|
pass |
|
|
|
|
return data_frame |
|
|
|
|