Browse Source

updated docstrings for documentation with mkdocs

main
Holger Frey 2 years ago
parent
commit
9747de7d69
  1. 26
      src/sensospot_tools/hdr.py
  2. 15
      src/sensospot_tools/helpers.py
  3. 21
      src/sensospot_tools/selection.py

26
src/sensospot_tools/hdr.py

@ -12,7 +12,7 @@ def select_hdr_data(
time_column: str, time_column: str,
overflow_column: str, overflow_column: str,
) -> pandas.DataFrame: ) -> pandas.DataFrame:
"""selects the data for increased dynamic measurement range """Selects the data for increased dynamic measurement range
To increase the dynamic range of a measurement, multiple exposures of one To increase the dynamic range of a measurement, multiple exposures of one
microarray might be taken. microarray might be taken.
@ -35,10 +35,17 @@ def select_hdr_data(
The function will raise a KeyError if any of the provided column names The function will raise a KeyError if any of the provided column names
is not present in the data frame is not present in the data frame
Args:
data: data with multiple exposure times
spot_id_columns: column names identifying a spot spot_id_columns: column names identifying a spot
time_column: column name for the (nominal) exposure time time_column: column name for the (nominal) exposure time
overflow_column: column name holding a overflow test result overflow_column: column name holding a overflow test result
returns: data frame with selected hdr data per spot
Returns:
a data frame with selected hdr data per spot
Raises:
KeyError: if any column does not exist in the data fram
""" """
check_columns_exist(data, spot_id_columns, time_column, overflow_column) check_columns_exist(data, spot_id_columns, time_column, overflow_column)
@ -68,20 +75,27 @@ def normalize(
value_columns: Union[list[str], str], value_columns: Union[list[str], str],
template: str = "Normalized.{}", template: str = "Normalized.{}",
) -> pandas.DataFrame: ) -> pandas.DataFrame:
"""normalizes values to a normalized exposure time """Normalizes values to a normalized exposure time.
Will raise a KeyError, if any column is not in the data frame; Will raise a KeyError, if any column is not in the data frame;
raises ValueError if no template string was provided. raises ValueError if no template string was provided.
Args:
data: data frame to normalize data: data frame to normalize
normalized_time: exposure time to normalize to normalized_time: exposure time to normalize to
time_column: column name of the (nominal) exposure time time_column: column name of the (nominal) exposure time
value_columns: which columns to normalize value_columns: which columns to normalize
template: a Python template string for the normalized column names template: a template string for the normalized column names
returns: copy of the data with additional normalized values
Returns:
copy of the data with additional normalized values
Raises:
KeyError: if any column is not in the data frame
ValueError: if the value for `template` is not a template string
""" """
check_columns_exist(data, time_column, value_columns) check_columns_exist(data, time_column, value_columns)
if "{}" not in template: if template == template.format("a"):
raise ValueError(f"Not a template string: '{template}'") raise ValueError(f"Not a template string: '{template}'")
data = data.copy() data = data.copy()

15
src/sensospot_tools/helpers.py

@ -10,6 +10,7 @@ def ensure_list(something: Any) -> list[Any]:
This is intended to use so that where column names should be provided This is intended to use so that where column names should be provided
as a list could also be provided as a single column name as a list could also be provided as a single column name
Examples:
>>> ensure_list("abc") >>> ensure_list("abc")
["abc"] ["abc"]
@ -19,8 +20,11 @@ def ensure_list(something: Any) -> list[Any]:
>>> ensure_list(1) >>> ensure_list(1)
[1] [1]
Args:
something: the value to be in or the list something: the value to be in or the list
returns: a list of whatever something is
Returns:
a list of whatever something is
""" """
# strings are iterables, so here is a special case for them # strings are iterables, so here is a special case for them
if isinstance(something, str): if isinstance(something, str):
@ -35,8 +39,15 @@ def ensure_list(something: Any) -> list[Any]:
def check_columns_exist(data: pandas.DataFrame, *arguments) -> bool: def check_columns_exist(data: pandas.DataFrame, *arguments) -> bool:
"""raises KeyError if columns dont exist in a data frame """raises KeyError if columns dont exist in a data frame
data : the pandas DataFrame to check for Args:
data : the pandas DataFrame to check
*arguments : variatic number of columns or lists of columns to check *arguments : variatic number of columns or lists of columns to check
Returns:
True if all columns exist in the data frame
Raises:
KeyError: if any column does not exist in the data frame
""" """
argument_items_as_lists = (ensure_list(arg) for arg in arguments) argument_items_as_lists = (ensure_list(arg) for arg in arguments)
check_cols = set(itertools.chain.from_iterable(argument_items_as_lists)) check_cols = set(itertools.chain.from_iterable(argument_items_as_lists))

21
src/sensospot_tools/selection.py

@ -8,24 +8,26 @@ def select(
) -> pandas.DataFrame: ) -> pandas.DataFrame:
"""Selects rows of a dataframe based on a value in a column """Selects rows of a dataframe based on a value in a column
Example: Examples:
>>> print(data) >>> print(data)
category value category value
0 dog 1 0 dog 1
1 cat 2 1 cat 2
2 horse 3 2 horse 3
3 cat 4 3 cat 4
>>> print(select(data, "category", "cat")) >>> print(select(data, "category", "cat"))
category value category value
1 cat 2 1 cat 2
3 cat 4 3 cat 4
Args:
data: a data DataFrame to select from data: a data DataFrame to select from
column: name of a column in a dataframe column: name of a column in a dataframe
value: rows with this value in the column will be selected value: rows with this value in the column will be selected
returns: a copy of the DataFrame that has the value in the column
Returns:
a copy of the DataFrame that has the value in the column
""" """
selector = data[column] == value selector = data[column] == value
return data.loc[selector].copy() return data.loc[selector].copy()
@ -40,7 +42,7 @@ def split(
unique value used for the split, the value is a slice of the dataframe unique value used for the split, the value is a slice of the dataframe
selected by the unique value contained in the column selected by the unique value contained in the column
Example: Examples:
>>> print(data) >>> print(data)
category value category value
@ -48,27 +50,24 @@ def split(
1 cat 2 1 cat 2
2 horse 3 2 horse 3
3 cat 4 3 cat 4
>>> result = dict( split(data, column="category") ) >>> result = dict( split(data, column="category") )
>>> print(result["dog"]) >>> print(result["dog"])
category value category value
0 dog 1 0 dog 1
>>> print(result["cat"]) >>> print(result["cat"])
category value category value
1 cat 2 1 cat 2
3 cat 4 3 cat 4
>>> print(result["horse"]) >>> print(result["horse"])
category value category value
2 horse 3 2 horse 3
Args:
data: DataFrame to process data: DataFrame to process
column: column identifier to split on unique values column: column identifier to split on unique values
yields: key-value-pairs of Yields:
keys: one unique value key-value-pairs of one unique value of the column as key and the
values: slice of the dataframe that contains the unique value corresponding slice of the dataframe as value
""" """
unique_values = data[column].unique() unique_values = data[column].unique()
return ((value, select(data, column, value)) for value in unique_values) return ((value, select(data, column, value)) for value in unique_values)

Loading…
Cancel
Save