diff --git a/src/sensospot_tools/hdr.py b/src/sensospot_tools/hdr.py index 7bb4ecf..3214dc5 100644 --- a/src/sensospot_tools/hdr.py +++ b/src/sensospot_tools/hdr.py @@ -12,7 +12,7 @@ def select_hdr_data( time_column: str, overflow_column: str, ) -> pandas.DataFrame: - """selects the data for increased dynamic measurement range + """Selects the data for increased dynamic measurement range To increase the dynamic range of a measurement, multiple exposures of one microarray might be taken. @@ -35,10 +35,17 @@ def select_hdr_data( The function will raise a KeyError if any of the provided column names is not present in the data frame - spot_id_columns: column names identifying a spot - time_column: column name for the (nominal) exposure time - overflow_column: column name holding a overflow test result - returns: data frame with selected hdr data per spot + Args: + data: data with multiple exposure times + spot_id_columns: column names identifying a spot + time_column: column name for the (nominal) exposure time + overflow_column: column name holding a overflow test result + + Returns: + a data frame with selected hdr data per spot + + Raises: + KeyError: if any column does not exist in the data fram """ check_columns_exist(data, spot_id_columns, time_column, overflow_column) @@ -68,20 +75,27 @@ def normalize( value_columns: Union[list[str], str], template: str = "Normalized.{}", ) -> pandas.DataFrame: - """normalizes values to a normalized exposure time + """Normalizes values to a normalized exposure time. Will raise a KeyError, if any column is not in the data frame; raises ValueError if no template string was provided. - data: data frame to normalize - normalized_time: exposure time to normalize to - time_column: column name of the (nominal) exposure time - value_columns: which columns to normalize - template: a Python template string for the normalized column names - returns: copy of the data with additional normalized values + Args: + data: data frame to normalize + normalized_time: exposure time to normalize to + time_column: column name of the (nominal) exposure time + value_columns: which columns to normalize + template: a template string for the normalized column names + + Returns: + copy of the data with additional normalized values + + Raises: + KeyError: if any column is not in the data frame + ValueError: if the value for `template` is not a template string """ check_columns_exist(data, time_column, value_columns) - if "{}" not in template: + if template == template.format("a"): raise ValueError(f"Not a template string: '{template}'") data = data.copy() diff --git a/src/sensospot_tools/helpers.py b/src/sensospot_tools/helpers.py index 80c1197..623e6e3 100644 --- a/src/sensospot_tools/helpers.py +++ b/src/sensospot_tools/helpers.py @@ -10,17 +10,21 @@ def ensure_list(something: Any) -> list[Any]: This is intended to use so that where column names should be provided as a list could also be provided as a single column name - >>> ensure_list("abc") - ["abc"] + Examples: + >>> ensure_list("abc") + ["abc"] - >>> ensure_list({"a", "b"}) - ["a", "b"] + >>> ensure_list({"a", "b"}) + ["a", "b"] - >>> ensure_list(1) - [1] + >>> ensure_list(1) + [1] - something: the value to be in or the list - returns: a list of whatever something is + Args: + something: the value to be in or the list + + Returns: + a list of whatever something is """ # strings are iterables, so here is a special case for them if isinstance(something, str): @@ -35,8 +39,15 @@ def ensure_list(something: Any) -> list[Any]: def check_columns_exist(data: pandas.DataFrame, *arguments) -> bool: """raises KeyError if columns dont exist in a data frame - data : the pandas DataFrame to check for - *arguments : variatic number of columns or lists of columns to check + Args: + data : the pandas DataFrame to check + *arguments : variatic number of columns or lists of columns to check + + Returns: + True if all columns exist in the data frame + + Raises: + KeyError: if any column does not exist in the data frame """ argument_items_as_lists = (ensure_list(arg) for arg in arguments) check_cols = set(itertools.chain.from_iterable(argument_items_as_lists)) diff --git a/src/sensospot_tools/selection.py b/src/sensospot_tools/selection.py index ebd26a2..e5a3aa5 100644 --- a/src/sensospot_tools/selection.py +++ b/src/sensospot_tools/selection.py @@ -8,24 +8,26 @@ def select( ) -> pandas.DataFrame: """Selects rows of a dataframe based on a value in a column - Example: - >>> print(data) + Examples: + >>> print(data) category value 0 dog 1 1 cat 2 2 horse 3 3 cat 4 - - >>> print(select(data, "category", "cat")) + >>> print(select(data, "category", "cat")) category value 1 cat 2 3 cat 4 - data: a data DataFrame to select from - column: name of a column in a dataframe - value: rows with this value in the column will be selected - returns: a copy of the DataFrame that has the value in the column + Args: + data: a data DataFrame to select from + column: name of a column in a dataframe + value: rows with this value in the column will be selected + + Returns: + a copy of the DataFrame that has the value in the column """ selector = data[column] == value return data.loc[selector].copy() @@ -40,35 +42,32 @@ def split( unique value used for the split, the value is a slice of the dataframe selected by the unique value contained in the column - Example: + Examples: - >>> print(data) + >>> print(data) category value 0 dog 1 1 cat 2 2 horse 3 3 cat 4 - - >>> result = dict( split(data, column="category") ) - - >>> print(result["dog"]) + >>> result = dict( split(data, column="category") ) + >>> print(result["dog"]) category value 0 dog 1 - - >>> print(result["cat"]) + >>> print(result["cat"]) category value 1 cat 2 3 cat 4 - - >>> print(result["horse"]) + >>> print(result["horse"]) category value 2 horse 3 - data: DataFrame to process - column: column identifier to split on unique values - yields: key-value-pairs of - keys: one unique value - values: slice of the dataframe that contains the unique value + Args: + data: DataFrame to process + column: column identifier to split on unique values + Yields: + key-value-pairs of one unique value of the column as key and the + corresponding slice of the dataframe as value """ unique_values = data[column].unique() return ((value, select(data, column, value)) for value in unique_values)