Browse Source

updated docstrings for documentation with mkdocs

main
Holger Frey 2 years ago
parent
commit
9747de7d69
  1. 40
      src/sensospot_tools/hdr.py
  2. 31
      src/sensospot_tools/helpers.py
  3. 45
      src/sensospot_tools/selection.py

40
src/sensospot_tools/hdr.py

@ -12,7 +12,7 @@ def select_hdr_data( @@ -12,7 +12,7 @@ def select_hdr_data(
time_column: str,
overflow_column: str,
) -> pandas.DataFrame:
"""selects the data for increased dynamic measurement range
"""Selects the data for increased dynamic measurement range
To increase the dynamic range of a measurement, multiple exposures of one
microarray might be taken.
@ -35,10 +35,17 @@ def select_hdr_data( @@ -35,10 +35,17 @@ def select_hdr_data(
The function will raise a KeyError if any of the provided column names
is not present in the data frame
spot_id_columns: column names identifying a spot
time_column: column name for the (nominal) exposure time
overflow_column: column name holding a overflow test result
returns: data frame with selected hdr data per spot
Args:
data: data with multiple exposure times
spot_id_columns: column names identifying a spot
time_column: column name for the (nominal) exposure time
overflow_column: column name holding a overflow test result
Returns:
a data frame with selected hdr data per spot
Raises:
KeyError: if any column does not exist in the data fram
"""
check_columns_exist(data, spot_id_columns, time_column, overflow_column)
@ -68,20 +75,27 @@ def normalize( @@ -68,20 +75,27 @@ def normalize(
value_columns: Union[list[str], str],
template: str = "Normalized.{}",
) -> pandas.DataFrame:
"""normalizes values to a normalized exposure time
"""Normalizes values to a normalized exposure time.
Will raise a KeyError, if any column is not in the data frame;
raises ValueError if no template string was provided.
data: data frame to normalize
normalized_time: exposure time to normalize to
time_column: column name of the (nominal) exposure time
value_columns: which columns to normalize
template: a Python template string for the normalized column names
returns: copy of the data with additional normalized values
Args:
data: data frame to normalize
normalized_time: exposure time to normalize to
time_column: column name of the (nominal) exposure time
value_columns: which columns to normalize
template: a template string for the normalized column names
Returns:
copy of the data with additional normalized values
Raises:
KeyError: if any column is not in the data frame
ValueError: if the value for `template` is not a template string
"""
check_columns_exist(data, time_column, value_columns)
if "{}" not in template:
if template == template.format("a"):
raise ValueError(f"Not a template string: '{template}'")
data = data.copy()

31
src/sensospot_tools/helpers.py

@ -10,17 +10,21 @@ def ensure_list(something: Any) -> list[Any]: @@ -10,17 +10,21 @@ def ensure_list(something: Any) -> list[Any]:
This is intended to use so that where column names should be provided
as a list could also be provided as a single column name
>>> ensure_list("abc")
["abc"]
Examples:
>>> ensure_list("abc")
["abc"]
>>> ensure_list({"a", "b"})
["a", "b"]
>>> ensure_list({"a", "b"})
["a", "b"]
>>> ensure_list(1)
[1]
>>> ensure_list(1)
[1]
something: the value to be in or the list
returns: a list of whatever something is
Args:
something: the value to be in or the list
Returns:
a list of whatever something is
"""
# strings are iterables, so here is a special case for them
if isinstance(something, str):
@ -35,8 +39,15 @@ def ensure_list(something: Any) -> list[Any]: @@ -35,8 +39,15 @@ def ensure_list(something: Any) -> list[Any]:
def check_columns_exist(data: pandas.DataFrame, *arguments) -> bool:
"""raises KeyError if columns dont exist in a data frame
data : the pandas DataFrame to check for
*arguments : variatic number of columns or lists of columns to check
Args:
data : the pandas DataFrame to check
*arguments : variatic number of columns or lists of columns to check
Returns:
True if all columns exist in the data frame
Raises:
KeyError: if any column does not exist in the data frame
"""
argument_items_as_lists = (ensure_list(arg) for arg in arguments)
check_cols = set(itertools.chain.from_iterable(argument_items_as_lists))

45
src/sensospot_tools/selection.py

@ -8,24 +8,26 @@ def select( @@ -8,24 +8,26 @@ def select(
) -> pandas.DataFrame:
"""Selects rows of a dataframe based on a value in a column
Example:
>>> print(data)
Examples:
>>> print(data)
category value
0 dog 1
1 cat 2
2 horse 3
3 cat 4
>>> print(select(data, "category", "cat"))
>>> print(select(data, "category", "cat"))
category value
1 cat 2
3 cat 4
data: a data DataFrame to select from
column: name of a column in a dataframe
value: rows with this value in the column will be selected
returns: a copy of the DataFrame that has the value in the column
Args:
data: a data DataFrame to select from
column: name of a column in a dataframe
value: rows with this value in the column will be selected
Returns:
a copy of the DataFrame that has the value in the column
"""
selector = data[column] == value
return data.loc[selector].copy()
@ -40,35 +42,32 @@ def split( @@ -40,35 +42,32 @@ def split(
unique value used for the split, the value is a slice of the dataframe
selected by the unique value contained in the column
Example:
Examples:
>>> print(data)
>>> print(data)
category value
0 dog 1
1 cat 2
2 horse 3
3 cat 4
>>> result = dict( split(data, column="category") )
>>> print(result["dog"])
>>> result = dict( split(data, column="category") )
>>> print(result["dog"])
category value
0 dog 1
>>> print(result["cat"])
>>> print(result["cat"])
category value
1 cat 2
3 cat 4
>>> print(result["horse"])
>>> print(result["horse"])
category value
2 horse 3
data: DataFrame to process
column: column identifier to split on unique values
yields: key-value-pairs of
keys: one unique value
values: slice of the dataframe that contains the unique value
Args:
data: DataFrame to process
column: column identifier to split on unique values
Yields:
key-value-pairs of one unique value of the column as key and the
corresponding slice of the dataframe as value
"""
unique_values = data[column].unique()
return ((value, select(data, column, value)) for value in unique_values)

Loading…
Cancel
Save