|
|
@ -32,8 +32,11 @@ def _guess_decimal_separator(file_handle: TextIO) -> str: |
|
|
|
This is a very crude method, but depending on the language setting, |
|
|
|
This is a very crude method, but depending on the language setting, |
|
|
|
different decimal separators may be used. |
|
|
|
different decimal separators may be used. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
file_handle: a file handle to an opened csv file |
|
|
|
file_handle: a file handle to an opened csv file |
|
|
|
returns: either '.' or ',' as a decimal separator |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
either '.' or ',' as a decimal separator |
|
|
|
""" |
|
|
|
""" |
|
|
|
file_handle.seek(0) |
|
|
|
file_handle.seek(0) |
|
|
|
headers = next(file_handle) # noqa: F841 |
|
|
|
headers = next(file_handle) # noqa: F841 |
|
|
@ -48,8 +51,11 @@ def _parse_csv(data_file: PathLike) -> pandas.DataFrame: |
|
|
|
|
|
|
|
|
|
|
|
Tries to guess the decimal separator from the file contents |
|
|
|
Tries to guess the decimal separator from the file contents |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_file: path to the csv file |
|
|
|
data_file: path to the csv file |
|
|
|
returns: pandas DataFrame with the parsed data |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
pandas data frame with the parsed data |
|
|
|
""" |
|
|
|
""" |
|
|
|
data_path = pathlib.Path(data_file) |
|
|
|
data_path = pathlib.Path(data_file) |
|
|
|
with data_path.open("r") as handle: |
|
|
|
with data_path.open("r") as handle: |
|
|
@ -61,8 +67,11 @@ def _parse_csv(data_file: PathLike) -> pandas.DataFrame: |
|
|
|
def _extract_measurement_info(data_file: PathLike) -> FileInfo: |
|
|
|
def _extract_measurement_info(data_file: PathLike) -> FileInfo: |
|
|
|
"""extract measurement meta data from a file name |
|
|
|
"""extract measurement meta data from a file name |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_file: path to the csv data file |
|
|
|
data_file: path to the csv data file |
|
|
|
returns: named tuple FileInfo with parsed metadata |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
named tuple FileInfo with parsed metadata |
|
|
|
""" |
|
|
|
""" |
|
|
|
data_path = pathlib.Path(data_file) |
|
|
|
data_path = pathlib.Path(data_file) |
|
|
|
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 |
|
|
|
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 |
|
|
@ -78,8 +87,11 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo: |
|
|
|
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame: |
|
|
|
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame: |
|
|
|
"""renames some data columns for consistency and drops unused columns |
|
|
|
"""renames some data columns for consistency and drops unused columns |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_frame: pandas DataFrame with parsed measurement data |
|
|
|
data_frame: pandas DataFrame with parsed measurement data |
|
|
|
returns: pandas DataFrame, column names cleaned up |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
pandas DataFrame, column names cleaned up |
|
|
|
""" |
|
|
|
""" |
|
|
|
renamed = data_frame.rename(columns=columns.CSV_RENAME_MAP) |
|
|
|
renamed = data_frame.rename(columns=columns.CSV_RENAME_MAP) |
|
|
|
surplus_columns = set(renamed.columns) - columns.PARSED_DATA_COLUMN_SET |
|
|
|
surplus_columns = set(renamed.columns) - columns.PARSED_DATA_COLUMN_SET |
|
|
@ -91,9 +103,14 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame: |
|
|
|
|
|
|
|
|
|
|
|
will race a ValueError, if metadata could not be extracted |
|
|
|
will race a ValueError, if metadata could not be extracted |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_file: path to the csv data file |
|
|
|
data_file: path to the csv data file |
|
|
|
raises: ValueError if metadata could not be extracted |
|
|
|
|
|
|
|
returns: pandas DataFrame with the parsed data |
|
|
|
Returns: |
|
|
|
|
|
|
|
pandas data frame with the parsed data |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Raises: |
|
|
|
|
|
|
|
ValueError: if metadata could not be extracted |
|
|
|
""" |
|
|
|
""" |
|
|
|
data_path = pathlib.Path(data_file).resolve() |
|
|
|
data_path = pathlib.Path(data_file).resolve() |
|
|
|
measurement_info = _extract_measurement_info(data_path) |
|
|
|
measurement_info = _extract_measurement_info(data_path) |
|
|
@ -112,8 +129,13 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame: |
|
|
|
def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: |
|
|
|
def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: |
|
|
|
"""parses one data file and adds metadata |
|
|
|
"""parses one data file and adds metadata |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Safety checks are supressed |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_file: path to the csv data file |
|
|
|
data_file: path to the csv data file |
|
|
|
returns: pandas DataFrame with the parsed data or None on error |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
pandas data frame with the parsed data or None on error |
|
|
|
""" |
|
|
|
""" |
|
|
|
try: |
|
|
|
try: |
|
|
|
return parse_file(data_file) |
|
|
|
return parse_file(data_file) |
|
|
@ -124,8 +146,10 @@ def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: |
|
|
|
def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame: |
|
|
|
def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame: |
|
|
|
"""parses a list of file paths to one combined data frame |
|
|
|
"""parses a list of file paths to one combined data frame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
file_list: collection of paths to csv data files |
|
|
|
file_list: collection of paths to csv data files |
|
|
|
returns: pandas DataFrame with all parsed data combined |
|
|
|
Returns: |
|
|
|
|
|
|
|
pandas data frame with all parsed data combined |
|
|
|
""" |
|
|
|
""" |
|
|
|
if not file_list: |
|
|
|
if not file_list: |
|
|
|
raise ValueError("Empty file list provided") |
|
|
|
raise ValueError("Empty file list provided") |
|
|
@ -141,8 +165,11 @@ def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame: |
|
|
|
def find_csv_files(folder: PathLike) -> Sequence[pathlib.Path]: |
|
|
|
def find_csv_files(folder: PathLike) -> Sequence[pathlib.Path]: |
|
|
|
"""returns all csv files in a folder |
|
|
|
"""returns all csv files in a folder |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
folder: path to the folder to search for csv files |
|
|
|
folder: path to the folder to search for csv files |
|
|
|
returns: iterator with the found csv files |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
iterator with the found csv files |
|
|
|
""" |
|
|
|
""" |
|
|
|
folder_path = pathlib.Path(folder) |
|
|
|
folder_path = pathlib.Path(folder) |
|
|
|
files = (item for item in folder_path.iterdir() if item.is_file()) |
|
|
|
files = (item for item in folder_path.iterdir() if item.is_file()) |
|
|
@ -153,9 +180,14 @@ def find_csv_files(folder: PathLike) -> Sequence[pathlib.Path]: |
|
|
|
def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame: |
|
|
|
def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame: |
|
|
|
"""checks some basic constrains of a combined data frame |
|
|
|
"""checks some basic constrains of a combined data frame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
data_frame: measurement data |
|
|
|
data_frame: measurement data |
|
|
|
raises: ValueError if basic constrains are not met |
|
|
|
|
|
|
|
returns: pandas DataFrame |
|
|
|
Returns: |
|
|
|
|
|
|
|
a pandas DataFrame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Raises: |
|
|
|
|
|
|
|
ValueError: if basic constrains are not met |
|
|
|
""" |
|
|
|
""" |
|
|
|
field_rows = len(data_frame[columns.WELL_ROW].unique()) |
|
|
|
field_rows = len(data_frame[columns.WELL_ROW].unique()) |
|
|
|
field_cols = len(data_frame[columns.WELL_COLUMN].unique()) |
|
|
|
field_cols = len(data_frame[columns.WELL_COLUMN].unique()) |
|
|
@ -178,9 +210,12 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame: |
|
|
|
Will raise an ValueError, if no sensospot data could be found in |
|
|
|
Will raise an ValueError, if no sensospot data could be found in |
|
|
|
the folder |
|
|
|
the folder |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
folder: path of folder containing data files |
|
|
|
folder: path of folder containing data files |
|
|
|
quiet: skip sanity check, defaults to False |
|
|
|
quiet: skip sanity check, defaults to False |
|
|
|
returns: pandas dataframe with parsed data |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
|
|
|
a pandas data frame with parsed data |
|
|
|
""" |
|
|
|
""" |
|
|
|
folder_path = pathlib.Path(folder) |
|
|
|
folder_path = pathlib.Path(folder) |
|
|
|
file_list = find_csv_files(folder_path) |
|
|
|
file_list = find_csv_files(folder_path) |
|
|
|