You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
2.4 KiB
85 lines
2.4 KiB
import dataclasses |
|
import pandas as pd |
|
import pytest |
|
|
|
from sklearn import linear_model |
|
|
|
|
|
@dataclasses.dataclass |
|
class Regression: |
|
intercept: float |
|
coefficient: float |
|
score: float |
|
|
|
@property |
|
def coeff(self) -> float: |
|
return self.coefficient |
|
|
|
@property |
|
def r2(self) -> float: |
|
return self.score |
|
|
|
def predict(self, x: int | float = None, y: int | float = None) -> float: |
|
"""predict a value if x or y is given""" |
|
if x is not None: |
|
return self.intercept + x * self.coefficient |
|
if y is not None: |
|
return (y - self.intercept) / self.coefficient |
|
msg = "predict() expects 1 argument, got 0" |
|
raise TypeError(msg) |
|
|
|
def to_dict(self): |
|
return dataclasses.asdict(self) |
|
|
|
|
|
def linear_regression(data: pd.DataFrame, *, x: str, y: str) -> Regression: |
|
"""calculates a linear regression for two columns of a DataFrame""" |
|
x_values = data[x].values.reshape(-1, 1) |
|
y_values = data[y].values.reshape(-1, 1) |
|
fit = linear_model.LinearRegression().fit(x_values, y_values) |
|
score = fit.score(x_values, y_values) |
|
return Regression(fit.intercept_[0], fit.coef_[0][0], score) |
|
|
|
|
|
# tests |
|
|
|
|
|
@pytest.fixture() |
|
def example_data() -> pd.DataFrame: |
|
x = list(range(1, 6)) |
|
y = [4.1, 6.9, 10.1, 12.9, 15.9] |
|
return pd.DataFrame({"A": x, "B": y}) |
|
|
|
|
|
def test_linear_regression(example_data): |
|
result = linear_regression(example_data, x="A", y="B") |
|
|
|
assert isinstance(result, Regression) |
|
assert pytest.approx(2.96) == result.coefficient |
|
assert pytest.approx(2.96) == result.coeff |
|
assert pytest.approx(1.1) == result.intercept |
|
assert pytest.approx(0.9996349) == result.score |
|
assert pytest.approx(0.9996349) == result.r2 |
|
|
|
|
|
def test_regression_predict(example_data): |
|
regression = linear_regression(example_data, x="A", y="B") |
|
|
|
prediction = regression.predict(10) |
|
|
|
assert pytest.approx(30.7) == prediction |
|
assert pytest.approx(10) == regression.predict(y=prediction) |
|
|
|
with pytest.raises(TypeError, match="expects 1 argument"): |
|
regression.predict() |
|
|
|
|
|
def test_regression_to_dict(example_data): |
|
regression = linear_regression(example_data, x="A", y="B") |
|
|
|
result = regression.to_dict() |
|
|
|
assert sorted(result.keys()) == ["coefficient", "intercept", "score"] |
|
assert pytest.approx(2.96) == result["coefficient"] |
|
assert pytest.approx(1.1) == result["intercept"] |
|
assert pytest.approx(0.9996349) == result["score"]
|
|
|