diff --git a/Makefile b/Makefile index 99d9b0f..a6d1dab 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean clean-test clean-pyc clean-build docs help +.PHONY: clean clean-test clean-pyc clean-build docs help prepareenv .DEFAULT_GOAL := help define BROWSER_PYSCRIPT @@ -82,13 +82,16 @@ tox: ## run fully isolated tests with tox install: ## install updated project.toml with flint flit install --pth-file -devenv: ## setup development environment +prepareenv: ## setup a virtualenv and install basic reuqired packages python3 -m venv --prompt sensospot_tools .venv .venv/bin/pip3 install --upgrade pip .venv/bin/pip3 install "flit>3.2" .venv/bin/flit install --pth-file -repo: devenv ## complete project setup with development environment and git repo +devenv: prepareenv ## setup development environment including pre commit hooks + .venv/bin/pre-commit install --install-hooks + +repo: prepareenv ## complete project setup with development environment and git repo git init . git add . git commit -m "import of project template" diff --git a/README.md b/README.md index 709a6a6..a92b42e 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,65 @@ Sensospot Tools Some small tools for working with parsed Sensospot data. -## Example: +## Selecting and spliting a pandas data frame +### sensospot_tools.select(data: DataFrame, column: str, value: Any) -> DataFrame + +Selects rows of a dataframe based on a value in a column + +Example: +```python + + from sensospot_tools import select + + print(data) + category value + 0 dog 1 + 1 cat 2 + 2 horse 3 + 3 cat 4 + + print(select(data, "category", "cat")) + category value + 1 cat 2 + 3 cat 4 +``` + + +### sensospot_tools.split(data: DataFrame, column: str) -> Iterator[tuple[Any, DataFrame]] + +Splits a data frame on unique values in a column + +Returns an iterator where each result is key-value-pair. The key is the +unique value used for the split, the value is a slice of the dataframe +selected by the unique value contained in the column. + +Example: ```python - import sensospot_tools + from sensospot_tools import split + + print(data) + category value + 0 dog 1 + 1 cat 2 + 2 horse 3 + 3 cat 4 + + result = dict( split(data, column="category") ) + + print(result["dog"]) + category value + 0 dog 1 + + print(result["cat"]) + category value + 1 cat 2 + 3 cat 4 - sensospot_tools.run() + print(result["horse"]) + category value + 2 horse 3 ``` diff --git a/src/sensospot_tools/selection.py b/src/sensospot_tools/selection.py index e9d708b..ebd26a2 100644 --- a/src/sensospot_tools/selection.py +++ b/src/sensospot_tools/selection.py @@ -6,7 +6,7 @@ import pandas def select( data: pandas.DataFrame, column: str, value: Any ) -> pandas.DataFrame: - """selects a portion of a dataframe based by a value in a column + """Selects rows of a dataframe based on a value in a column Example: >>> print(data) @@ -34,7 +34,7 @@ def select( def split( data: pandas.DataFrame, column: str ) -> Iterator[tuple[Any, pandas.DataFrame]]: - """splits a data frame by unique values in a column + """Splits a data frame on unique values in a column returns an iterator where each result is key-value-pair. The key is the unique value used for the split, the value is a slice of the dataframe