snippets/linear_regression.py

import dataclasses
import pandas as pd
import pytest

from sklearn import linear_model


@dataclasses.dataclass
class Regression:
    intercept: float
    coefficient: float
    score: float

    @property
    def coeff(self) -> float:
        return self.coefficient

    @property
    def r2(self) -> float:
        return self.score

    def predict(self, x: int | float = None, y: int | float = None) -> float:
        """predict a value if x or y is given"""
        if x is not None:
            return self.intercept + x * self.coefficient
        if y is not None:
            return (y - self.intercept) / self.coefficient
        msg = "predict() expects 1 argument, got 0"
        raise TypeError(msg)

    def to_dict(self):
        return dataclasses.asdict(self)


def linear_regression(data: pd.DataFrame, *, x: str, y: str) -> Regression:
    """calculates a linear regression for two columns of a DataFrame"""
    x_values = data[x].values.reshape(-1, 1)
    y_values = data[y].values.reshape(-1, 1)
    fit = linear_model.LinearRegression().fit(x_values, y_values)
    score = fit.score(x_values, y_values)
    return Regression(fit.intercept_[0], fit.coef_[0][0], score)


# tests


@pytest.fixture()
def example_data() -> pd.DataFrame:
    x = list(range(1, 6))
    y = [4.1, 6.9, 10.1, 12.9, 15.9]
    return pd.DataFrame({"A": x, "B": y})


def test_linear_regression(example_data):
    result = linear_regression(example_data, x="A", y="B")

    assert isinstance(result, Regression)
    assert pytest.approx(2.96) == result.coefficient
    assert pytest.approx(2.96) == result.coeff
    assert pytest.approx(1.1) == result.intercept
    assert pytest.approx(0.9996349) == result.score
    assert pytest.approx(0.9996349) == result.r2


def test_regression_predict(example_data):
    result = linear_regression(example_data, x="A", y="B")

    prediction = result.predict(10)

    assert pytest.approx(30.7) == prediction
    assert pytest.approx(10) == result.predict(y=prediction)

    with pytest.raises(TypeError, match="expects 1 argument"):
        result.predict()
added `to_dict()` method to the `Regression` result class this makes it easier to construct data frames out of a list of regression results. 2 years ago			`import dataclasses`
added linear_regression module 2 years ago			`import pandas as pd`
			`import pytest`

			`from sklearn import linear_model`


added `to_dict()` method to the `Regression` result class this makes it easier to construct data frames out of a list of regression results. 2 years ago			`@dataclasses.dataclass`
added linear_regression module 2 years ago			`class Regression:`
			`intercept: float`
			`coefficient: float`
			`score: float`

			`@property`
			`def coeff(self) -> float:`
			`return self.coefficient`

			`@property`
			`def r2(self) -> float:`
			`return self.score`

			`def predict(self, x: int \| float = None, y: int \| float = None) -> float:`
			`"""predict a value if x or y is given"""`
			`if x is not None:`
			`return self.intercept + x * self.coefficient`
			`if y is not None:`
			`return (y - self.intercept) / self.coefficient`
			`msg = "predict() expects 1 argument, got 0"`
			`raise TypeError(msg)`

added `to_dict()` method to the `Regression` result class this makes it easier to construct data frames out of a list of regression results. 2 years ago			`def to_dict(self):`
			`return dataclasses.asdict(self)`

added linear_regression module 2 years ago
			`def linear_regression(data: pd.DataFrame, *, x: str, y: str) -> Regression:`
			`"""calculates a linear regression for two columns of a DataFrame"""`
			`x_values = data[x].values.reshape(-1, 1)`
			`y_values = data[y].values.reshape(-1, 1)`
			`fit = linear_model.LinearRegression().fit(x_values, y_values)`
			`score = fit.score(x_values, y_values)`
			`return Regression(fit.intercept_[0], fit.coef_[0][0], score)`


			`# tests`


			`@pytest.fixture()`
			`def example_data() -> pd.DataFrame:`
			`x = list(range(1, 6))`
			`y = [4.1, 6.9, 10.1, 12.9, 15.9]`
			`return pd.DataFrame({"A": x, "B": y})`


			`def test_linear_regression(example_data):`
			`result = linear_regression(example_data, x="A", y="B")`

			`assert isinstance(result, Regression)`
			`assert pytest.approx(2.96) == result.coefficient`
			`assert pytest.approx(2.96) == result.coeff`
			`assert pytest.approx(1.1) == result.intercept`
			`assert pytest.approx(0.9996349) == result.score`
			`assert pytest.approx(0.9996349) == result.r2`


			`def test_regression_predict(example_data):`
			`result = linear_regression(example_data, x="A", y="B")`

			`prediction = result.predict(10)`

			`assert pytest.approx(30.7) == prediction`
			`assert pytest.approx(10) == result.predict(y=prediction)`

			`with pytest.raises(TypeError, match="expects 1 argument"):`
			`result.predict()`