import dataclasses import pandas as pd import pytest from sklearn import linear_model @dataclasses.dataclass class Regression: intercept: float coefficient: float score: float @property def coeff(self) -> float: return self.coefficient @property def r2(self) -> float: return self.score def predict(self, *, x: int | float = None, y: int | float = None) -> float: """predict a value if x or y is given""" if x is not None and y is not None: msg = "predict() expects one keyword argument 'x' or 'y', got both" raise TypeError(msg) if x is not None: return self.intercept + x * self.coefficient if y is not None: return (y - self.intercept) / self.coefficient msg = "predict() expects a keyword argument 'x' or 'y'" raise TypeError(msg) def to_dict(self): return dataclasses.asdict(self) def linear_regression(data: pd.DataFrame, *, x: str, y: str) -> Regression: """calculates a linear regression for two columns of a DataFrame""" x_values = data[x].values.reshape(-1, 1) y_values = data[y].values.reshape(-1, 1) fit = linear_model.LinearRegression().fit(x_values, y_values) score = fit.score(x_values, y_values) return Regression(fit.intercept_[0], fit.coef_[0][0], score) # tests @pytest.fixture() def example_data() -> pd.DataFrame: x = list(range(1, 6)) y = [4.1, 6.9, 10.1, 12.9, 15.9] return pd.DataFrame({"A": x, "B": y}) def test_linear_regression(example_data): result = linear_regression(example_data, x="A", y="B") assert isinstance(result, Regression) assert pytest.approx(2.96) == result.coefficient assert pytest.approx(2.96) == result.coeff assert pytest.approx(1.1) == result.intercept assert pytest.approx(0.9996349) == result.score assert pytest.approx(0.9996349) == result.r2 def test_regression_predict(example_data): regression = linear_regression(example_data, x="A", y="B") prediction = regression.predict(x=10) assert pytest.approx(30.7) == prediction assert pytest.approx(10) == regression.predict(y=prediction) def test_regression_predict_exceptions(example_data): regression = linear_regression(example_data, x="A", y="B") with pytest.raises(TypeError, match="expects a keyword"): regression.predict() with pytest.raises(TypeError, match="expects one keyword"): regression.predict(x=1, y=2) with pytest.raises(TypeError, match="takes 1 positional argument but"): regression.predict(1) def test_regression_to_dict(example_data): regression = linear_regression(example_data, x="A", y="B") result = regression.to_dict() assert sorted(result.keys()) == ["coefficient", "intercept", "score"] assert pytest.approx(2.96) == result["coefficient"] assert pytest.approx(1.1) == result["intercept"] assert pytest.approx(0.9996349) == result["score"]