diff --git a/tests/test_profile.py b/tests/test_profile.py index a10bf32..c5e8a04 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -1,9 +1,21 @@ +from typing import Callable, Type + import polars as pl +import pytest from wimsey import profile from wimsey import execution +def raise_exception_patch(exception_type: Type[Exception]) -> Callable: + """Creates a patch that will throw an error""" + + def raise_exception(*args, **kwargs) -> None: + raise exception_type + + return raise_exception + + def test_starter_tests_from_sampling_returns_passing_test() -> None: df = pl.DataFrame( { @@ -77,3 +89,16 @@ def test_save_tests_from_samples_creates_expected_and_runnable_file(tmp_path) -> ) result = execution.test(df, str(tmp_path / "cool.json")) assert result.success + + +def test_validate_or_build_falls_back_to_save_starter_tests_from_sampling_if_validate_crashes( + monkeypatch, +) -> None: + monkeypatch.setattr(profile, "validate", raise_exception_patch(FileNotFoundError)) + monkeypatch.setattr( + profile, + "save_starter_tests_from_sampling", + raise_exception_patch(ZeroDivisionError), + ) + with pytest.raises(ZeroDivisionError): + profile.validate_or_build(None, None) diff --git a/wimsey/_version.py b/wimsey/_version.py index 3d18726..906d362 100644 --- a/wimsey/_version.py +++ b/wimsey/_version.py @@ -1 +1 @@ -__version__ = "0.5.0" +__version__ = "0.6.0" diff --git a/wimsey/execution.py b/wimsey/execution.py index 5b582f4..14083e4 100644 --- a/wimsey/execution.py +++ b/wimsey/execution.py @@ -70,7 +70,9 @@ def test( def validate( - df: FrameT, contract: str | list[dict] | dict, storage_options: dict | None = None + df: FrameT, + contract: str | list[dict] | dict, + storage_options: dict | None = None, ) -> FrameT: """ Carry out tests on dataframe, returning original dataframe if tests are diff --git a/wimsey/profile.py b/wimsey/profile.py index 4848f1c..770e12f 100644 --- a/wimsey/profile.py +++ b/wimsey/profile.py @@ -7,6 +7,7 @@ from narwhals.stable.v1.typing import FrameT from wimsey.dataframe import profile_from_sampling, profile_from_samples +from wimsey.execution import validate class _StarterTestStatus(Enum): @@ -222,3 +223,33 @@ def _type_starter_tests( test |= {"be_one_of": list(types)} tests.append(test) return tests + + +def validate_or_build( + df: FrameT, + contract: str, + samples: int = 100, + n: int | None = None, + fraction: int | None = None, + margin: float = 1, + storage_options: dict | None = None, +) -> FrameT: + """ + Will attempt to validate based on a given contract, but if that contract does not exist yet + will generate one from sampling the dataset. + + Will fall back to starter_tests_from_sampling (a list samples is not possible with + only one dataframe), see *starter_tests_from_sampling* and *save_starter_tests_from_sampling* + for more details on use of keyword arguments aside from df, contract and storage_options. + """ + try: + validate(df, contract=contract, storage_options=storage_options) + except FileNotFoundError: + save_starter_tests_from_sampling( + path=contract, + df=df, + samples=samples, + n=n, + fraction=fraction, + margin=margin, + )