|
|
@ -16,7 +16,8 @@ from .columns import ( |
|
|
|
META_DATA_EXPOSURE_ID, |
|
|
|
META_DATA_EXPOSURE_ID, |
|
|
|
META_DATA_WELL_COLUMN, |
|
|
|
META_DATA_WELL_COLUMN, |
|
|
|
PARSED_DATA_COLUMN_SET, |
|
|
|
PARSED_DATA_COLUMN_SET, |
|
|
|
RAW_DATA_NORMALIZATION_MAP, |
|
|
|
META_DATA_ANALYSIS_NAME, |
|
|
|
|
|
|
|
RAW_DATA_NUMERIC_COLUMNS, |
|
|
|
RAW_DATA_COLUMNS_RENAME_MAP, |
|
|
|
RAW_DATA_COLUMNS_RENAME_MAP, |
|
|
|
) |
|
|
|
) |
|
|
|
from .parameters import add_optional_measurement_parameters |
|
|
|
from .parameters import add_optional_measurement_parameters |
|
|
@ -75,8 +76,9 @@ def parse_file(data_file): |
|
|
|
|
|
|
|
|
|
|
|
will race a ValueError, if metadata could not be extracted |
|
|
|
will race a ValueError, if metadata could not be extracted |
|
|
|
""" |
|
|
|
""" |
|
|
|
measurement_info = _extract_measurement_info(Path(data_file)) |
|
|
|
data_path = Path(data_file).resolve() |
|
|
|
data_frame = _parse_csv(data_file) |
|
|
|
measurement_info = _extract_measurement_info(data_path) |
|
|
|
|
|
|
|
data_frame = _parse_csv(data_path) |
|
|
|
# normalized well name |
|
|
|
# normalized well name |
|
|
|
data_frame[ |
|
|
|
data_frame[ |
|
|
|
META_DATA_WELL_NAME |
|
|
|
META_DATA_WELL_NAME |
|
|
@ -84,6 +86,7 @@ def parse_file(data_file): |
|
|
|
data_frame[META_DATA_WELL_ROW] = measurement_info.row |
|
|
|
data_frame[META_DATA_WELL_ROW] = measurement_info.row |
|
|
|
data_frame[META_DATA_WELL_COLUMN] = measurement_info.column |
|
|
|
data_frame[META_DATA_WELL_COLUMN] = measurement_info.column |
|
|
|
data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure |
|
|
|
data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure |
|
|
|
|
|
|
|
data_frame[META_DATA_ANALYSIS_NAME] = data_path.parent.name |
|
|
|
return _cleanup_data_columns(data_frame) |
|
|
|
return _cleanup_data_columns(data_frame) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -133,16 +136,17 @@ def _sanity_check(data_frame): |
|
|
|
f"Measurements are missing: {expected_rows} != {len(data_frame)}" |
|
|
|
f"Measurements are missing: {expected_rows} != {len(data_frame)}" |
|
|
|
) |
|
|
|
) |
|
|
|
# set the right data type for measurement columns |
|
|
|
# set the right data type for measurement columns |
|
|
|
for raw_column in RAW_DATA_NORMALIZATION_MAP: |
|
|
|
for raw_column in RAW_DATA_NUMERIC_COLUMNS: |
|
|
|
data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column]) |
|
|
|
data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column]) |
|
|
|
return data_frame |
|
|
|
return data_frame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_folder(folder, quiet=False): |
|
|
|
def parse_folder(folder, quiet=False): |
|
|
|
"""parses all csv files in a folder to one large dataframe""" |
|
|
|
"""parses all csv files in a folder to one large dataframe""" |
|
|
|
file_list = list_csv_files(Path(folder)) |
|
|
|
folder_path = Path(folder) |
|
|
|
|
|
|
|
file_list = list_csv_files(folder_path) |
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
data_frame = parse_multiple_files(file_list) |
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder) |
|
|
|
data_frame = add_optional_measurement_parameters(data_frame, folder_path) |
|
|
|
if quiet: |
|
|
|
if quiet: |
|
|
|
return data_frame |
|
|
|
return data_frame |
|
|
|
return _sanity_check(data_frame) |
|
|
|
return _sanity_check(data_frame) |
|
|
|