|
|
@ -13,9 +13,9 @@ from .columns import ( |
|
|
|
RAW_DATA_POS_ID, |
|
|
|
RAW_DATA_POS_ID, |
|
|
|
META_DATA_WELL_ROW, |
|
|
|
META_DATA_WELL_ROW, |
|
|
|
META_DATA_WELL_NAME, |
|
|
|
META_DATA_WELL_NAME, |
|
|
|
PARSED_DATA_COLUMN_SET, |
|
|
|
|
|
|
|
META_DATA_EXPOSURE_ID, |
|
|
|
META_DATA_EXPOSURE_ID, |
|
|
|
META_DATA_WELL_COLUMN, |
|
|
|
META_DATA_WELL_COLUMN, |
|
|
|
|
|
|
|
PARSED_DATA_COLUMN_SET, |
|
|
|
RAW_DATA_NORMALIZATION_MAP, |
|
|
|
RAW_DATA_NORMALIZATION_MAP, |
|
|
|
RAW_DATA_COLUMNS_RENAME_MAP, |
|
|
|
RAW_DATA_COLUMNS_RENAME_MAP, |
|
|
|
) |
|
|
|
) |
|
|
@ -33,7 +33,7 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _guess_decimal_separator(file_handle): |
|
|
|
def _guess_decimal_separator(file_handle): |
|
|
|
""" guesses the decimal spearator of a opened data file """ |
|
|
|
"""guesses the decimal spearator of a opened data file""" |
|
|
|
file_handle.seek(0) |
|
|
|
file_handle.seek(0) |
|
|
|
headers = next(file_handle) # noqa: F841 |
|
|
|
headers = next(file_handle) # noqa: F841 |
|
|
|
data = next(file_handle) |
|
|
|
data = next(file_handle) |
|
|
@ -43,7 +43,7 @@ def _guess_decimal_separator(file_handle): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_csv(data_file): |
|
|
|
def _parse_csv(data_file): |
|
|
|
""" parse a csv sensovation data file """ |
|
|
|
"""parse a csv sensovation data file""" |
|
|
|
data_path = Path(data_file) |
|
|
|
data_path = Path(data_file) |
|
|
|
with data_path.open("r") as handle: |
|
|
|
with data_path.open("r") as handle: |
|
|
|
decimal_sep = _guess_decimal_separator(handle) |
|
|
|
decimal_sep = _guess_decimal_separator(handle) |
|
|
@ -51,7 +51,7 @@ def _parse_csv(data_file): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_measurement_info(data_file): |
|
|
|
def _extract_measurement_info(data_file): |
|
|
|
""" extract measurement meta data from a file name """ |
|
|
|
"""extract measurement meta data from a file name""" |
|
|
|
data_path = Path(data_file) |
|
|
|
data_path = Path(data_file) |
|
|
|
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 |
|
|
|
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 |
|
|
|
matched = REGEX_WELL.match(well) |
|
|
|
matched = REGEX_WELL.match(well) |
|
|
@ -64,7 +64,7 @@ def _extract_measurement_info(data_file): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _cleanup_data_columns(data_frame): |
|
|
|
def _cleanup_data_columns(data_frame): |
|
|
|
""" renames some data columns for consistency and drops unused columns """ |
|
|
|
"""renames some data columns for consistency and drops unused columns""" |
|
|
|
renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP) |
|
|
|
renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP) |
|
|
|
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET |
|
|
|
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET |
|
|
|
return renamed.drop(columns=surplus_columns) |
|
|
|
return renamed.drop(columns=surplus_columns) |
|
|
@ -99,7 +99,7 @@ def _silenced_parse_file(data_file): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_multiple_files(file_list): |
|
|
|
def parse_multiple_files(file_list): |
|
|
|
""" parses a list of file paths to one combined dataframe """ |
|
|
|
"""parses a list of file paths to one combined dataframe""" |
|
|
|
if not file_list: |
|
|
|
if not file_list: |
|
|
|
raise ValueError("Empty file list provided") |
|
|
|
raise ValueError("Empty file list provided") |
|
|
|
collection = (_silenced_parse_file(path) for path in file_list) |
|
|
|
collection = (_silenced_parse_file(path) for path in file_list) |
|
|
@ -114,7 +114,7 @@ def parse_multiple_files(file_list): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_csv_files(folder): |
|
|
|
def list_csv_files(folder): |
|
|
|
""" returns all csv files in a folder """ |
|
|
|
"""returns all csv files in a folder""" |
|
|
|
folder_path = Path(folder) |
|
|
|
folder_path = Path(folder) |
|
|
|
files = (item for item in folder_path.iterdir() if item.is_file()) |
|
|
|
files = (item for item in folder_path.iterdir() if item.is_file()) |
|
|
|
visible = (item for item in files if not item.stem.startswith(".")) |
|
|
|
visible = (item for item in files if not item.stem.startswith(".")) |
|
|
@ -122,7 +122,7 @@ def list_csv_files(folder): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sanity_check(data_frame): |
|
|
|
def _sanity_check(data_frame): |
|
|
|
""" checks some basic constrains of a combined data frame """ |
|
|
|
"""checks some basic constrains of a combined data frame""" |
|
|
|
field_rows = len(data_frame[META_DATA_WELL_ROW].unique()) |
|
|
|
field_rows = len(data_frame[META_DATA_WELL_ROW].unique()) |
|
|
|
field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique()) |
|
|
|
field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique()) |
|
|
|
exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique()) |
|
|
|
exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique()) |
|
|
@ -139,7 +139,7 @@ def _sanity_check(data_frame): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_folder(folder, quiet=False): |
|
|
|
def parse_folder(folder, quiet=False): |
|
|
|
""" parses all csv files in a folder to one large dataframe """ |
|
|
|
"""parses all csv files in a folder to one large dataframe""" |
|
|
|
file_list = list_csv_files(Path(folder)) |
|
|
|
file_list = list_csv_files(Path(folder)) |
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder) |
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder) |
|
|
|