diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index c3b20f64e4..8d5d4a007b 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 jobs: minimum_versions: @@ -44,7 +45,10 @@ jobs: echo "$DEPS" | grep 'scikit-learn==1.1.0' echo "$DEPS" | grep 'duckdb==1.1' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb + coverage combine + coverage report --fail-under=50 pretty_old_versions: strategy: @@ -82,7 +86,10 @@ jobs: echo "$DEPS" | grep 'scikit-learn==1.1.0' echo "$DEPS" | grep 'duckdb==1.2' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb + coverage combine + coverage report --fail-under=50 not_so_old_versions: strategy: @@ -119,7 +126,10 @@ jobs: echo "$DEPS" | grep 'dask==2024.10' echo "$DEPS" | grep 'duckdb==1.3' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage combine + coverage report --fail-under=50 nightlies: strategy: @@ -175,5 +185,6 @@ jobs: echo "$DEPS" | grep 'dask.*@' - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow \ - --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage combine + coverage report --fail-under=50 diff --git a/.github/workflows/pytest-ibis.yml b/.github/workflows/pytest-ibis.yml index 2910a597f0..9823bf1985 100644 --- a/.github/workflows/pytest-ibis.yml +++ b/.github/workflows/pytest-ibis.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 jobs: @@ -36,4 +37,4 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors ibis + run: pytest tests --nw-backends ibis diff --git a/.github/workflows/pytest-modin.yml b/.github/workflows/pytest-modin.yml index ff717b2e32..3f491f4ea4 100644 --- a/.github/workflows/pytest-modin.yml +++ b/.github/workflows/pytest-modin.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 jobs: @@ -34,4 +35,4 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors modin[pyarrow] + run: pytest tests --nw-backends modin[pyarrow] diff --git a/.github/workflows/pytest-pyspark.yml b/.github/workflows/pytest-pyspark.yml index 043c630abd..8ec17c834a 100644 --- a/.github/workflows/pytest-pyspark.yml +++ b/.github/workflows/pytest-pyspark.yml @@ -14,6 +14,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 jobs: pytest-pyspark-constructor: @@ -40,7 +41,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors pyspark + run: | + coverage run -m pytest tests --runslow --nw-backends pyspark + coverage combine + coverage report --fail-under=95 --include "narwhals/_spark_like/*" pytest-pyspark-min-version-constructor: @@ -67,7 +71,7 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors pyspark + run: pytest tests --nw-backends pyspark pytest-pyspark-connect-constructor: strategy: @@ -133,7 +137,10 @@ jobs: echo "Spark Connect server started" - name: Run pytest - run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors "pyspark[connect]" + run: | + coverage run -m pytest tests --runslow --nw-backends "pyspark[connect]" + coverage combine + coverage report --fail-under=95 --include="narwhals/_spark_like/*" - name: Stop Spark Connect server if: always() diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9f6205d1cc..562416d5f7 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 jobs: pytest-39: @@ -30,7 +31,15 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 --constructors=pandas,pyarrow,polars[eager],polars[lazy] + env: + # coverage's execv/fork patches raise on Windows; collapse to `subprocess` + # there (coverage dedupes) and keep the default values on Linux. + COVERAGE_PATCH_EXECV: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'execv' }} + COVERAGE_PATCH_FORK: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'fork' }} + run: | + coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy] + coverage combine + coverage report --fail-under=75 - name: install-test-plugin run: uv pip install -e test-plugin/. @@ -40,6 +49,11 @@ jobs: python-version: ["3.10", "3.12"] os: [windows-latest] runs-on: ${{ matrix.os }} + env: + # coverage's execv/fork patches raise on Windows; collapse them to `subprocess` + # in the pyproject `patch` list (coverage dedupes). + COVERAGE_PATCH_EXECV: subprocess + COVERAGE_PATCH_FORK: subprocess steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 @@ -60,7 +74,9 @@ jobs: run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=95 pytest-full-coverage: strategy: @@ -91,7 +107,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=100 - name: Run doctests # reprs differ between versions, so we only run doctests on the latest Python if: matrix.python-version == '3.13' @@ -120,20 +139,20 @@ jobs: uv pip install -e ".[pandas]" --group tests uv pip freeze - name: Run pytest (pandas and pandas[nullable]) - run: pytest tests --runslow --constructors=pandas,pandas[nullable] + run: pytest tests --runslow --nw-backends=pandas,pandas[nullable] - name: install-more-reqs run: | uv pip install -U pyarrow uv pip freeze - name: Run pytest (pandas[pyarrow] and pyarrow) - run: pytest tests --runslow --constructors=pandas[pyarrow],pyarrow + run: pytest tests --runslow --nw-backends=pandas[pyarrow],pyarrow - name: install-polars run: | uv pip uninstall pandas pyarrow uv pip install polars uv pip freeze - name: Run pytest (polars) - run: pytest tests --runslow --constructors=polars[eager],polars[lazy] + run: pytest tests --runslow --nw-backends=polars[eager],polars[lazy] python-314: strategy: @@ -157,7 +176,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --cov-fail-under=50 + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=50 python-314t: strategy: @@ -183,4 +205,7 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow --cov-fail-under=50 + run: | + coverage run -m pytest tests --runslow --durations=30 --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow + coverage combine + coverage report --fail-under=50 diff --git a/.github/workflows/random_ci_pytest.yml b/.github/workflows/random_ci_pytest.yml index 2950989871..7e73be44ab 100644 --- a/.github/workflows/random_ci_pytest.yml +++ b/.github/workflows/random_ci_pytest.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml jobs: tox: @@ -36,5 +37,6 @@ jobs: run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 \ - --constructors=pandas,pyarrow,polars[eager],polars[lazy] + coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy] + coverage combine + coverage report --fail-under=75 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98a274e4ed..3dc3129c66 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -149,7 +149,7 @@ If you add code that should be tested, please add tests. - To run tests, run `pytest`. To check coverage: `pytest --cov=narwhals` - To run tests on the doctests, use `pytest narwhals --doctest-modules` -- To run unit tests and doctests at the same time, run `pytest tests narwhals --cov=narwhals --doctest-modules` +- To run unit tests and doctests at the same time, run `pytest tests narwhals --doctest-modules` - To run tests multiprocessed, you may also want to use [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) (optional) - To choose which backends to run tests with you, you can use the `--constructors` flag: - To only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars` diff --git a/Makefile b/Makefile index 5c328a492f..ba743a77b6 100644 --- a/Makefile +++ b/Makefile @@ -40,3 +40,15 @@ docs-serve: # Build and serve the docs locally $(VENV_BIN)/uv run --no-sync utils/generate_backend_completeness.py $(VENV_BIN)/uv run --no-sync utils/generate_zen_content.py $(VENV_BIN)/uv run --no-sync zensical serve + +.PHONY: test +test: ## Run unittest + $(VENV_BIN)/uv pip install \ + --upgrade \ + --editable test-plugin/. \ + --editable .[ibis,modin,pyspark] \ + --group core \ + --group tests + $(VENV_BIN)/uv run --no-sync coverage run -m pytest tests --all-nw-backends --numprocesses=logical + $(VENV_BIN)/uv run --no-sync coverage combine + $(VENV_BIN)/uv run --no-sync coverage report --fail-under=95 diff --git a/docs/api-reference/testing.md b/docs/api-reference/testing.md index db83c6930e..d4bf928e59 100644 --- a/docs/api-reference/testing.md +++ b/docs/api-reference/testing.md @@ -1,8 +1,79 @@ # `narwhals.testing` +## Assertions + ::: narwhals.testing handler: python options: + show_root_heading: false + heading_level: 3 members: - assert_frame_equal - assert_series_equal + +## `pytest` plugin + +Narwhals register a pytest plugin that exposes parametrized fixtures with callables +to build native frames from a column-oriented python `dict`. + +### Available fixtures + +| Fixture | Backends | +|---|---| +| `nw_frame_constructor` | every selected backend (eager + lazy) | +| `nw_eager_constructor` | only eager backends | +| `nw_pandas_like_constructor` | pandas-like backends | + +### Pytest options + +The backend selection is controlled by the following CLI options: + +* `--nw-backends=pandas,polars[lazy],duckdb`: comma-separated list. + Defaults to [`DEFAULT_BACKENDS`][narwhals.testing.constructors.DEFAULT_BACKENDS] + intersected with the backends installed in the current environment. +* `--nw-all-backends`: shortcut for "every **CPU** backend that is installed". +* `--use-nw-external-constructor`: Skip narwhals.testing's parametrisation and let + another plugin provide the `constructor*` fixtures. + +Set the `NARWHALS_DEFAULT_BACKENDS` environment variable to override the default +list (useful e.g. when running under `cudf.pandas`). + +### Quick start + +The plugin auto-loads as soon as you `pip install narwhals`. Just write a test: + +```python +from typing import TYPE_CHECKING + +import narwhals as nw + +if TYPE_CHECKING: + from narwhals.testing.typing import EagerFrameConstructor, Data + + +def test_shape(nw_eager_constructor: EagerFrameConstructor) -> None: + data: Data = {"x": [1, 2, 3]} + df = nw.from_native(nw_eager_constructor(data), eager_only=True) + assert df.shape == (3, 1) +``` + +The fixtures are parametrised against every supported backend that is installed +in the current environment. Filter the matrix on the command line: + +```bash +pytest --nw-backends="pandas,polars[lazy]" +pytest --all-nw-backends +``` + +## Type aliases + +::: narwhals.testing.typing + handler: python + options: + show_root_heading: false + heading_level: 3 + members: + - Data + - FrameConstructor + - EagerFrameConstructor + - LazyFrameConstructor diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index a154a1a3f8..bc0a1ca26a 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -8,7 +8,12 @@ if TYPE_CHECKING: from typing_extensions import TypeAlias - from narwhals._native import NativeDataFrame, NativeDuckDB, NativeLazyFrame + from narwhals._native import ( + NativeDataFrame, + NativeDuckDB, + NativeIbis, + NativeLazyFrame, + ) from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series class DataFrameLike(Protocol): @@ -25,7 +30,9 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike", "NativeDuckDB"] +IntoDataFrame: TypeAlias = Union[ + "NativeDataFrame", "DataFrameLike", "NativeDuckDB", "NativeIbis" +] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. diff --git a/narwhals/testing/__init__.py b/narwhals/testing/__init__.py index 649463383f..6eb8c0b0d0 100644 --- a/narwhals/testing/__init__.py +++ b/narwhals/testing/__init__.py @@ -2,5 +2,6 @@ from narwhals.testing.asserts.frame import assert_frame_equal from narwhals.testing.asserts.series import assert_series_equal +from narwhals.testing.constructors import frame_constructor -__all__ = ("assert_frame_equal", "assert_series_equal") +__all__ = ("assert_frame_equal", "assert_series_equal", "frame_constructor") diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py index 64eec42abc..9386abad13 100644 --- a/narwhals/testing/asserts/frame.py +++ b/narwhals/testing/asserts/frame.py @@ -13,7 +13,6 @@ if TYPE_CHECKING: from narwhals._typing import Arrow, IntoBackend, Pandas, Polars - from narwhals.typing import DataFrameT, LazyFrameT GUARANTEES_ROW_ORDER = { Implementation.PANDAS, @@ -26,8 +25,8 @@ def assert_frame_equal( - left: DataFrameT | LazyFrameT, - right: DataFrameT | LazyFrameT, + left: DataFrame[Any] | LazyFrame[Any], + right: DataFrame[Any] | LazyFrame[Any], *, check_row_order: bool = True, check_column_order: bool = True, @@ -145,8 +144,8 @@ def assert_frame_equal( def _check_correct_input_type( # noqa: RET503 - left: DataFrameT | LazyFrameT, - right: DataFrameT | LazyFrameT, + left: DataFrame[Any] | LazyFrame[Any], + right: DataFrame[Any] | LazyFrame[Any], backend: IntoBackend[Polars | Pandas | Arrow] | None, ) -> tuple[DataFrame[Any], DataFrame[Any]]: # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17 @@ -165,8 +164,8 @@ def _check_correct_input_type( # noqa: RET503 def _assert_dataframe_equal( - left: DataFrameT, - right: DataFrameT, + left: DataFrame[Any], + right: DataFrame[Any], impl: Implementation, *, check_row_order: bool, @@ -232,7 +231,11 @@ def _assert_dataframe_equal( def _check_schema_equal( - left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool + left: DataFrame[Any], + right: DataFrame[Any], + *, + check_dtypes: bool, + check_column_order: bool, ) -> None: """Compares DataFrame schema based on specified criteria. diff --git a/narwhals/testing/constructors.py b/narwhals/testing/constructors.py new file mode 100644 index 0000000000..dab2d74fbf --- /dev/null +++ b/narwhals/testing/constructors.py @@ -0,0 +1,656 @@ +"""Constructor registry for `narwhals.testing`. + +Each constructor wraps one backend library (pandas, Polars, DuckDB, ...) and +knows how to turn a column-oriented `dict` into a native frame. + +Registration is explicit: wrap a plain builder function with `@frame_constructor.register(...)`. +The decorator instantiates a [`narwhals.testing.frame_constructor`][] with the +declared metadata and stores it in the shared `_registry`. + +## Adding a new constructor + +```py +from narwhals.testing import frame_constructor + + +@frame_constructor.register( + name="my_backend", + implementation=Implementation.MY_BACKEND, + requirements=("my_backend",), +) +def my_backend_lazy_constructor(obj: Data, /, **kwds: Any) -> IntoLazyFrame: + import my_backend + + return my_backend.from_dict(obj) +``` +""" + +from __future__ import annotations + +import os +import uuid +import warnings +from copy import deepcopy +from functools import lru_cache +from importlib.util import find_spec +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Generic, + Literal, + TypeVar, + cast, + overload, +) + +from narwhals._utils import Implementation, generate_temporary_column_name + +if TYPE_CHECKING: + from collections.abc import Iterable + + import ibis + import pandas as pd + import polars as pl + import pyarrow as pa + from ibis.backends.duckdb import Backend as IbisDuckDBBackend + from pyspark.sql import SparkSession + from sqlframe.duckdb import DuckDBSession + from typing_extensions import Concatenate, TypeAlias + + from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame + from narwhals.testing.typing import Data + from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame + + +__all__ = ( + "available_backends", + "available_cpu_backends", + "frame_constructor", + "get_backend_constructor", + "is_backend_available", + "prepare_backends", + "pyspark_session", + "sqlframe_session", +) + +T_co = TypeVar("T_co", covariant=True, bound="IntoFrame") +R = TypeVar("R", bound="IntoFrame") + + +class frame_constructor(Generic[T_co]): # noqa: N801 + """Callable wrapper around a backend frame builder. + + Turns a column-oriented `dict` (typed as [`Data`][narwhals.testing.typing.Data]) + into a native frame. Metadata (implementation, requirements, eager/lazy, + nullability, GPU need) lives on the instance, alongside the wrapped + `func`. Equality and hashing are keyed on `(type, name)`, so two lookups + of the same registered constructor compare equal. + + Warning: + Instances should be created via [`narwhals.testing.constructors.frame_constructor.register`][], + which is the only supported entry point. + + Direct instantiation is allowed but **does not** register the instance. + """ + + _registry: ClassVar[dict[str, frame_constructor[IntoFrame]]] = {} + + def __init__( + self, + func: Callable[Concatenate[Data, ...], T_co], + /, + *, + name: str, + implementation: Implementation, + requirements: tuple[str, ...] = (), + is_eager: bool = False, + is_nullable: bool = True, + needs_gpu: bool = False, + ) -> None: + self.func = func + self.name = name + self.implementation = implementation + self.requirements = requirements + self.is_eager = is_eager + self.is_nullable = is_nullable + self.needs_gpu = needs_gpu + + @classmethod + def register( + cls, + *, + name: str, + implementation: Implementation, + requirements: tuple[str, ...] = (), + is_eager: bool = False, + is_nullable: bool = True, + needs_gpu: bool = False, + ) -> Callable[[Callable[Concatenate[Data, ...], R]], frame_constructor[R]]: + """Decorator: register `func` as the constructor named `name`. + + Arguments: + name: The string identifier of the constructor (e.g. `"pandas[pyarrow]"`). + implementation: The [`Implementation`][] this constructor belongs to. + requirements: Package names that must be importable for this constructor + to be available (checked via `importlib.util.find_spec`). + is_eager: Whether the backend returns an eager dataframe. + is_nullable: Whether the backend has native null support. + needs_gpu: Whether the backend requires GPU hardware. + + Returns: + A decorator that replaces `func` with a `frame_constructor` + instance registered into the shared `_registry`. + """ + + def decorator(func: Callable[Concatenate[Data, ...], R]) -> frame_constructor[R]: + inst: frame_constructor[R] = frame_constructor( + func, + name=name, + implementation=implementation, + requirements=requirements, + is_eager=is_eager, + is_nullable=is_nullable, + needs_gpu=needs_gpu, + ) + cls._registry[name] = inst + return inst + + return decorator + + def __call__(self, obj: Data, /, **kwds: Any) -> T_co: + """Build a native frame from `obj` by delegating to the wrapped function.""" + return self.func(obj, **kwds) + + @property + def identifier(self) -> str: + """Instance-level string identifier for test IDs.""" + return self.name + + @property + def is_lazy(self) -> bool: + """Whether this constructor produces a lazy native frame.""" + return not self.is_eager + + @property + def is_pandas(self) -> bool: + """Whether this is one of the pandas constructors.""" + return self.implementation.is_pandas() + + @property + def is_modin(self) -> bool: + """Whether this is one of the modin constructors.""" + return self.implementation.is_modin() + + @property + def is_cudf(self) -> bool: + """Whether this is the cudf constructor.""" + return self.implementation.is_cudf() + + @property + def is_pandas_like(self) -> bool: + """Whether this constructor produces a pandas-like dataframe (pandas, modin, cudf).""" + return self.implementation.is_pandas_like() + + @property + def is_polars(self) -> bool: + """Whether this is one of the polars constructors.""" + return self.implementation.is_polars() + + @property + def is_pyarrow(self) -> bool: + """Whether this is the pyarrow table constructor.""" + return self.implementation.is_pyarrow() + + @property + def is_dask(self) -> bool: + """Whether this is the dask constructor.""" + return self.implementation.is_dask() + + @property + def is_duckdb(self) -> bool: + """Whether this is the duckdb constructor.""" + return self.implementation.is_duckdb() + + @property + def is_pyspark(self) -> bool: + """Whether this is one of the pyspark constructors.""" + impl = self.implementation + return impl.is_pyspark() or impl.is_pyspark_connect() + + @property + def is_sqlframe(self) -> bool: + """Whether this is the sqlframe constructor.""" + return self.implementation.is_sqlframe() + + @property + def is_ibis(self) -> bool: + """Whether this is the ibis constructor.""" + return self.implementation.is_ibis() + + @property + def is_spark_like(self) -> bool: + """Whether this constructor uses a spark-like backend (pyspark, sqlframe).""" + return self.implementation.is_spark_like() + + @property + def needs_pyarrow(self) -> bool: + """Whether this constructor requires `pyarrow` to be installed.""" + return "pyarrow" in self.requirements + + @property + def is_available(self) -> bool: + """Whether every package this constructor needs is importable.""" + return is_backend_available(*self.requirements) + + def __str__(self) -> str: + # NOTE: This is a temporary hack + # TODO(FBruzzesi): Remove once all the `"backend" in str(constructor)` + # statements in the test suite are properly replaced + return self.func.__name__ + + def __repr__(self) -> str: + return f"{type(self).__name__}(name={self.name!r})" + + def __hash__(self) -> int: + return hash((type(self), self.name)) + + def __eq__(self, other: object) -> bool: + return isinstance(other, frame_constructor) and self.name == other.name + + +# Eager constructors + + +@frame_constructor.register( + name="pandas", + implementation=Implementation.PANDAS, + requirements=("pandas",), + is_eager=True, + is_nullable=False, +) +def pandas_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds) + + +@frame_constructor.register( + name="pandas[nullable]", + implementation=Implementation.PANDAS, + requirements=("pandas",), + is_eager=True, +) +def pandas_nullable_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="numpy_nullable") + + +@frame_constructor.register( + name="pandas[pyarrow]", + implementation=Implementation.PANDAS, + requirements=("pandas", "pyarrow"), + is_eager=True, +) +def pandas_pyarrow_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="pyarrow") + + +@frame_constructor.register( + name="pyarrow", + implementation=Implementation.PYARROW, + requirements=("pyarrow",), + is_eager=True, +) +def pyarrow_table_constructor(obj: Data, /, **kwds: Any) -> pa.Table: + import pyarrow as pa + + return pa.table(obj, **kwds) + + +@frame_constructor.register( + name="modin", + implementation=Implementation.MODIN, + requirements=("modin",), + is_eager=True, + is_nullable=False, +) +def modin_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame: # pragma: no cover + import modin.pandas as mpd + import pandas as pd + + return cast("IntoDataFrame", mpd.DataFrame(pd.DataFrame(obj, **kwds))) + + +@frame_constructor.register( + name="modin[pyarrow]", + implementation=Implementation.MODIN, + requirements=("modin", "pyarrow"), + is_eager=True, +) +def modin_pyarrow_constructor( + obj: Data, /, **kwds: Any +) -> IntoDataFrame: # pragma: no cover + import modin.pandas as mpd + import pandas as pd + + df = mpd.DataFrame(pd.DataFrame(obj, **kwds)).convert_dtypes(dtype_backend="pyarrow") + return cast("IntoDataFrame", df) + + +@frame_constructor.register( + name="cudf", + implementation=Implementation.CUDF, + requirements=("cudf",), + is_eager=True, + needs_gpu=True, +) +def cudf_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame: # pragma: no cover + import cudf + + return cast("IntoDataFrame", cudf.DataFrame(obj, **kwds)) + + +@frame_constructor.register( + name="polars[eager]", + implementation=Implementation.POLARS, + requirements=("polars",), + is_eager=True, +) +def polars_eager_constructor(obj: Data, /, **kwds: Any) -> pl.DataFrame: + import polars as pl + + return pl.DataFrame(obj, **kwds) + + +# Lazy constructors + + +@frame_constructor.register( + name="polars[lazy]", implementation=Implementation.POLARS, requirements=("polars",) +) +def polars_lazy_constructor(obj: Data, /, **kwds: Any) -> pl.LazyFrame: + import polars as pl + + return pl.LazyFrame(obj, **kwds) + + +@frame_constructor.register( + name="dask", + implementation=Implementation.DASK, + requirements=("dask",), + is_nullable=False, +) +def dask_lazy_p2_constructor( + obj: Data, /, npartitions: int = 2, **kwds: Any +) -> NativeDask: # pragma: no cover + import dask.dataframe as dd + + return cast("NativeDask", dd.from_dict(obj, npartitions=npartitions, **kwds)) + + +@frame_constructor.register( + name="duckdb", + implementation=Implementation.DUCKDB, + requirements=("duckdb", "pyarrow"), +) +def duckdb_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeDuckDB: + import duckdb + import pyarrow as pa + + duckdb.sql("""set timezone = 'UTC'""") + _df = pa.table(obj, **kwds) + return duckdb.sql("select * from _df") + + +def _pyspark_build(obj: Data, /, **kwds: Any) -> NativePySpark: # pragma: no cover + session = _pyspark_session_lazy() + _obj = deepcopy(obj) + index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) + _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) + result = ( + session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()], **kwds) + .repartition(2) + .orderBy(index_col_name) + .drop(index_col_name) + ) + return cast("NativePySpark", result) + + +@frame_constructor.register( + name="pyspark", implementation=Implementation.PYSPARK, requirements=("pyspark",) +) +def pyspark_lazy_constructor( + obj: Data, /, **kwds: Any +) -> NativePySpark: # pragma: no cover + return _pyspark_build(obj, **kwds) + + +@frame_constructor.register( + name="pyspark[connect]", + implementation=Implementation.PYSPARK_CONNECT, + requirements=("pyspark",), +) +def pyspark_connect_lazy_constructor( + obj: Data, /, **kwds: Any +) -> NativePySpark: # pragma: no cover + return _pyspark_build(obj, **kwds) + + +@frame_constructor.register( + name="sqlframe", + implementation=Implementation.SQLFRAME, + requirements=("sqlframe", "duckdb"), +) +def sqlframe_pyspark_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeSQLFrame: + session = sqlframe_session() + return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()], **kwds) + + +@frame_constructor.register( + name="ibis", + implementation=Implementation.IBIS, + requirements=("ibis", "duckdb", "pyarrow"), +) +def ibis_lazy_constructor(obj: Data, /, **kwds: Any) -> ibis.Table: # pragma: no cover + import pyarrow as pa + + table = pa.table(obj) + table_name = str(uuid.uuid4()) + return _ibis_backend().create_table(table_name, table, **kwds) + + +DEFAULT_BACKENDS: frozenset[str] = frozenset( + { + "pandas", + "pandas[pyarrow]", + "polars[eager]", + "pyarrow", + "duckdb", + "sqlframe", + "ibis", + } +) +"""Subset of backends enabled by default for parametrised tests when the +user does not pass `--nw-backends` (mirrors the historical Narwhals defaults). +""" + + +def available_backends() -> frozenset[str]: + """Return the names of every constructor whose backend is importable. + + Examples: + >>> from narwhals.testing.constructors import available_backends + >>> "pandas" in available_backends() + True + """ + return frozenset( + name for name, c in frame_constructor._registry.items() if c.is_available + ) + + +def available_cpu_backends() -> frozenset[str]: # pragma: no cover + """Return the names of every CPU constructor whose backend is importable. + + Examples: + >>> from narwhals.testing.constructors import available_cpu_backends + >>> "pandas" in available_cpu_backends() + True + """ + return frozenset( + name + for name, c in frame_constructor._registry.items() + if c.is_available and not c.needs_gpu + ) + + +EagerName: TypeAlias = Literal[ + "pandas", + "pandas[nullable]", + "pandas[pyarrow]", + "modin", + "modin[pyarrow]", + "cudf", + "polars[eager]", + "pyarrow", +] +LazyName: TypeAlias = Literal[ + "polars[lazy]", "dask", "duckdb", "pyspark", "pyspark[connect]", "sqlframe", "ibis" +] + + +@overload +def get_backend_constructor(name: EagerName) -> frame_constructor[IntoDataFrame]: ... +@overload +def get_backend_constructor(name: LazyName) -> frame_constructor[IntoLazyFrame]: ... +@overload +def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]: ... + + +def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]: + """Return the registered constructor for `name`. + + Arguments: + name: The string identifier of a registered constructor + (e.g. `"pandas[pyarrow]"`). + + Raises: + ValueError: If `name` is not a registered constructor identifier. + + Examples: + >>> from narwhals.testing.constructors import get_backend_constructor + >>> get_backend_constructor("pandas") + frame_constructor(name='pandas') + """ + try: + return frame_constructor._registry[name] + except KeyError as exc: + valid = sorted(frame_constructor._registry) + msg = f"Unknown constructor {name!r}. Expected one of: {valid}." + raise ValueError(msg) from exc + + +def prepare_backends( + *, include: Iterable[str] | None = None, exclude: Iterable[str] | None = None +) -> list[frame_constructor[IntoFrame]]: + """Return available constructors, optionally filtered. + + Note: + `exclude` is given precedence in the selection. + + Arguments: + include: If given, only return backends whose name is in this set. + exclude: If given, remove backends whose name is in this set. + + Examples: + >>> from narwhals.testing.constructors import prepare_backends + >>> backends = prepare_backends(include=["pandas", "polars[eager]"]) + """ + available = available_backends() + candidates: list[frame_constructor[Any]] = [ + c for name, c in frame_constructor._registry.items() if name in available + ] + + include_set: frozenset[str] = ( + frozenset(include) if include is not None else frozenset() + ) + exclude_set: frozenset[str] = ( + frozenset(exclude) if exclude is not None else frozenset() + ) + + if unknown := (include_set.union(exclude_set).difference(available)): + msg = f"The following names are not known constructors: {sorted(unknown)}" + raise ValueError(msg) + + if include is not None: + candidates = [c for c in candidates if c.name in include_set] + if exclude is not None: + candidates = [c for c in candidates if c.name not in exclude_set] + return sorted(candidates, key=lambda c: c.name) + + +def is_backend_available(*packages: str) -> bool: + """Whether every package in `packages` can be imported in this environment. + + Examples: + >>> from narwhals.testing.constructors import is_backend_available + >>> is_backend_available("pandas") + True + """ + return all(find_spec(pkg) is not None for pkg in packages) + + +def sqlframe_session() -> DuckDBSession: + """Return a fresh in-memory `sqlframe` DuckDB session.""" + from sqlframe.duckdb import DuckDBSession + + # NOTE: `__new__` override inferred by `pyright` only + # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184 + return cast("DuckDBSession", DuckDBSession()) # type: ignore[redundant-cast] + + +def pyspark_session() -> SparkSession: # pragma: no cover + """Return a singleton local `pyspark` (or pyspark[connect]) session.""" + if is_spark_connect := os.environ.get("SPARK_CONNECT", None): + from pyspark.sql.connect.session import SparkSession + else: + from pyspark.sql import SparkSession + builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests") + builder = ( + builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}") + if is_spark_connect + else builder.master("local[1]").config("spark.ui.enabled", "false") + ) + return ( + builder.config("spark.default.parallelism", "1") + .config("spark.sql.shuffle.partitions", "2") + .config("spark.sql.session.timeZone", "UTC") + .getOrCreate() + ) + + +@lru_cache(maxsize=1) +def _ibis_backend() -> IbisDuckDBBackend: # pragma: no cover + """Cached singleton in-memory ibis backend, so all tables share one database.""" + import ibis + + return ibis.duckdb.connect() + + +@lru_cache(maxsize=1) +def _pyspark_session_lazy() -> SparkSession: # pragma: no cover + """Cached pyspark session; created on first use, stopped at interpreter exit.""" + from atexit import register + + with warnings.catch_warnings(): + # The spark session seems to trigger a polars warning. + warnings.filterwarnings( + "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning + ) + session = pyspark_session() + register(session.stop) + return session diff --git a/narwhals/testing/pytest_plugin.py b/narwhals/testing/pytest_plugin.py new file mode 100644 index 0000000000..28e9701742 --- /dev/null +++ b/narwhals/testing/pytest_plugin.py @@ -0,0 +1,138 @@ +"""Narwhals pytest plugin - auto-parametrises fixtures. + +NOTE: All imports from `narwhals.*` are deferred inside the hook functions so that +the entry-point module can be loaded by pytest without pulling in the narwhals package tree. + +This is critical because entry-point plugins are loaded *before* `coveragepy` starts +coverage measurement; any narwhals module imported at that stage would have its +module-level code (class definitions, constants, etc.) executed outside the coverage tracer. +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + import pytest + + from narwhals.testing.typing import FrameConstructor + + +_MIN_PANDAS_NULLABLE_VERSION: tuple[int, ...] = (2, 0, 0) +"""`pandas.convert_dtypes(dtype_backend=...)` requires pandas >= 2.0.0.""" + + +def _pandas_version() -> tuple[int, ...]: + try: + import pandas as pd + except ImportError: # pragma: no cover + return (0, 0, 0) + + from narwhals._utils import parse_version + + return parse_version(pd.__version__) + + +def _default_backend_ids() -> list[str]: + """Resolve the default `--nw-backends` value for the current environment. + + Honours `NARWHALS_DEFAULT_BACKENDS` if set, otherwise restricts + [`DEFAULT_BACKENDS`][] to backends whose libraries are importable. + """ + if env := os.environ.get("NARWHALS_DEFAULT_BACKENDS"): # pragma: no cover + return env.split(",") + from narwhals.testing.constructors import DEFAULT_BACKENDS, frame_constructor + + return [ + name + for name, constructor in frame_constructor._registry.items() + if constructor.is_available and name in DEFAULT_BACKENDS + ] + + +def pytest_addoption(parser: pytest.Parser) -> None: + from narwhals.testing.constructors import DEFAULT_BACKENDS + + group = parser.getgroup("narwhals", "narwhals-testing") + defaults = ", ".join(f"'{c}'" for c in sorted(DEFAULT_BACKENDS)) + group.addoption( + "--nw-backends", + action="store", + default=",".join(_default_backend_ids()), + type=str, + help=( + "Comma-separated list of (data|lazy) frame backend constructors to" + f"parametrise. Defaults to the installed subset of ({defaults})" + ), + ) + group.addoption( + "--all-nw-backends", + action="store_true", + default=False, + help=("Run tests against every installed CPU backend (overrides --nw-backends)."), + ) + # Escape hatch for downstream test suites that ship their own backend plugin. + # When set, this plugin still adds the CLI options but stops parametrising the fixtures. + group.addoption( + "--use-external-nw-backend", + action="store_true", + default=False, + help=( + "Skip narwhals-testing's parametrisation and let another plugin " + "provide the `nw_*frame_constructor` fixtures." + ), + ) + + +def _select_backends(config: pytest.Config) -> list[FrameConstructor]: # pragma: no cover + from narwhals.testing.constructors import available_cpu_backends, prepare_backends + + _all_cpu_exclusions = frozenset({"modin", "pyspark[connect]"}) + + if config.getoption("all_nw_backends"): + selected = prepare_backends( + include=available_cpu_backends(), exclude=_all_cpu_exclusions + ) + else: + opt = cast("str", config.getoption("nw_backends")) + names = [c for c in opt.split(",") if c] + selected = prepare_backends(include=names) + + if _pandas_version() < _MIN_PANDAS_NULLABLE_VERSION: + _pandas_nullables = {"pandas[nullable]", "pandas[pyarrow]"} + selected = [c for c in selected if c.name not in _pandas_nullables] + return selected + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + if metafunc.config.getoption("use_external_nw_backend"): # pragma: no cover + return + + fixturenames = set(metafunc.fixturenames) + if not fixturenames & { + "nw_frame", + "nw_dataframe", + "nw_lazyframe", + "nw_pandas_like_frame", + }: + return + + selected = _select_backends(metafunc.config) + + if "nw_dataframe" in fixturenames: + params = [c for c in selected if c.is_eager] + ids = [c.name for c in params] + metafunc.parametrize("nw_dataframe", params, ids=ids) + elif "nw_lazyframe" in fixturenames: # pragma: no cover + params = [c for c in selected if not c.is_eager] + ids = [c.name for c in params] + metafunc.parametrize("nw_dataframe", params, ids=ids) + elif "nw_frame" in fixturenames: + metafunc.parametrize("nw_frame", selected, ids=[c.name for c in selected]) + elif "nw_pandas_like_frame" in fixturenames: + params = [c for c in selected if c.is_eager and c.is_pandas_like] + ids = [c.name for c in params] + metafunc.parametrize("nw_pandas_like_frame", params, ids=ids) + else: # pragma: no cover + ... diff --git a/narwhals/testing/typing.py b/narwhals/testing/typing.py new file mode 100644 index 0000000000..f03e946887 --- /dev/null +++ b/narwhals/testing/typing.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from narwhals.testing.constructors import frame_constructor + from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame + + +__all__ = ("Data", "DataFrameConstructor", "FrameConstructor", "LazyFrameConstructor") + +FrameConstructor: TypeAlias = "frame_constructor[IntoFrame]" +"""Type alias for a constructor that returns a native eager or lazy frame.""" + +DataFrameConstructor: TypeAlias = "frame_constructor[IntoDataFrame]" +"""Type alias for a constructor that returns an eager native dataframe.""" + +LazyFrameConstructor: TypeAlias = "frame_constructor[IntoLazyFrame]" +"""Type alias for a constructor that returns a lazy native frame.""" + +Data: TypeAlias = dict[str, Any] # TODO(Unassined): This should have a better annotation +"""A column-oriented mapping used as input to a frame constructor.""" diff --git a/pyproject.toml b/pyproject.toml index a664a23ac4..2ae28d2f3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,10 @@ Documentation = "https://narwhals-dev.github.io/narwhals/" Repository = "https://github.com/narwhals-dev/narwhals" "Bug Tracker" = "https://github.com/narwhals-dev/narwhals/issues" +[project.entry-points.pytest11] +narwhals_testing = "narwhals.testing.pytest_plugin" +# See: https://docs.pytest.org/en/stable/how-to/writing_plugins.html#making-your-plugin-installable-by-others + [project.optional-dependencies] # These should be aligned with MIN_VERSIONS in narwhals/utils.py # Exception: modin, because `modin.__version__` isn't aligned with @@ -63,7 +67,6 @@ core = [ tests = [ "covdefaults", "pytest", - "pytest-cov", "pytest-env", "pytest-randomly", "pytest-xdist", @@ -297,7 +300,12 @@ env = [ ] [tool.coverage.run] +# execv and fork patches are unsupported on Windows (coverage raises), so Windows +# CI jobs set these env vars to "subprocess" — coverage dedupes the final list. +patch = ["${COVERAGE_PATCH_EXECV-execv}", "${COVERAGE_PATCH_FORK-fork}", "subprocess"] plugins = ["covdefaults"] +source = ["narwhals", "tests"] +parallel = true [tool.coverage.report] fail_under = 80 # This is just for local development, in CI we set it to 100 diff --git a/tests/conftest.py b/tests/conftest.py index 3e80bcdff4..f3f5ebf759 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,80 +1,38 @@ from __future__ import annotations -import os -import uuid -from copy import deepcopy -from functools import lru_cache from importlib.util import find_spec -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any import pytest import narwhals as nw -from narwhals._utils import Implementation, generate_temporary_column_name -from tests.utils import ID_PANDAS_LIKE, PANDAS_VERSION, pyspark_session, sqlframe_session +from narwhals._utils import Implementation + +# `narwhals.testing.pytest_plugin` registers itself via the `pytest11` entry point (see pyproject.toml) +# so it auto-loads as soon as Narwhals is installed. +# That plugin is what owns the `--constructors`, `--all-cpu-constructors`, and `--use-external-constructor` +# CLI options as well as parametrising the `constructor*` fixtures. if TYPE_CHECKING: from collections.abc import Sequence - import ibis - import pandas as pd - import polars as pl - import pyarrow as pa - from ibis.backends.duckdb import Backend as IbisDuckDBBackend from typing_extensions import TypeAlias - from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame from narwhals._typing import EagerAllowed - from narwhals.typing import IntoDataFrame, NonNestedDType - from tests.utils import ( - Constructor, - ConstructorEager, - ConstructorLazy, - NestedOrEnumDType, - ) + from narwhals.testing.typing import DataFrameConstructor, FrameConstructor + from narwhals.typing import NonNestedDType + from tests.utils import NestedOrEnumDType Data: TypeAlias = "dict[str, list[Any]]" -MIN_PANDAS_NULLABLE_VERSION = (2,) - -# When testing cudf.pandas in Kaggle, we get an error if we try to run -# python -m cudf.pandas -m pytest --constructors=pandas. This gives us -# a way to run `python -m cudf.pandas -m pytest` and control which constructors -# get tested. -if default_constructors := os.environ.get( - "NARWHALS_DEFAULT_CONSTRUCTORS", None -): # pragma: no cover - DEFAULT_CONSTRUCTORS = default_constructors -else: - DEFAULT_CONSTRUCTORS = ( - "pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis" - ) +# Narwhals-internal pytest options (not part of the public testing plugin) def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" ) - parser.addoption( - "--all-cpu-constructors", - action="store_true", - default=False, - help="run tests with all cpu constructors", - ) - parser.addoption( - "--use-external-constructor", - action="store_true", - default=False, - help="run tests with external constructor", - ) - parser.addoption( - "--constructors", - action="store", - default=DEFAULT_CONSTRUCTORS, - type=str, - help="libraries to test", - ) def pytest_configure(config: pytest.Config) -> None: @@ -85,7 +43,6 @@ def pytest_collection_modifyitems( config: pytest.Config, items: Sequence[pytest.Function] ) -> None: # pragma: no cover if config.getoption("--runslow"): - # --runslow given in cli: do not skip slow tests return skip_slow = pytest.mark.skip(reason="need --runslow option to run") for item in items: @@ -93,235 +50,6 @@ def pytest_collection_modifyitems( item.add_marker(skip_slow) -def pandas_constructor(obj: Data) -> pd.DataFrame: - import pandas as pd - - return pd.DataFrame(obj) - - -def pandas_nullable_constructor(obj: Data) -> pd.DataFrame: - import pandas as pd - - return pd.DataFrame(obj).convert_dtypes(dtype_backend="numpy_nullable") - - -def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: - pytest.importorskip("pyarrow") - import pandas as pd - - return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") - - -def modin_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import modin.pandas as mpd - import pandas as pd - - df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("IntoDataFrame", df) - - -def modin_pyarrow_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import modin.pandas as mpd - import pandas as pd - - df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("IntoDataFrame", df) - - -def cudf_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import cudf - - df = cudf.DataFrame(obj) - return cast("IntoDataFrame", df) - - -def polars_eager_constructor(obj: Data) -> pl.DataFrame: - pytest.importorskip("polars") - import polars as pl - - return pl.DataFrame(obj) - - -def polars_lazy_constructor(obj: Data) -> pl.LazyFrame: - import polars as pl - - return pl.LazyFrame(obj) - - -def duckdb_lazy_constructor(obj: dict[str, Any]) -> NativeDuckDB: - pytest.importorskip("duckdb") - pytest.importorskip("pyarrow") - import duckdb - import pyarrow as pa - - duckdb.sql("""set timezone = 'UTC'""") - - _df = pa.table(obj) - return duckdb.sql("select * from _df") - - -def dask_lazy_p1_constructor(obj: Data) -> NativeDask: # pragma: no cover - import dask.dataframe as dd - - return cast("NativeDask", dd.from_dict(obj, npartitions=1)) - - -def dask_lazy_p2_constructor(obj: Data) -> NativeDask: # pragma: no cover - import dask.dataframe as dd - - return cast("NativeDask", dd.from_dict(obj, npartitions=2)) - - -def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: - pytest.importorskip("pyarrow") - import pyarrow as pa - - return pa.table(obj) - - -def pyspark_lazy_constructor() -> Callable[[Data], NativePySpark]: # pragma: no cover - pytest.importorskip("pyspark") - import warnings - from atexit import register - - with warnings.catch_warnings(): - # The spark session seems to trigger a polars warning. - # Polars is imported in the tests, but not used in the spark operations - warnings.filterwarnings( - "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning - ) - session = pyspark_session() - - register(session.stop) - - def _constructor(obj: Data) -> NativePySpark: - _obj = deepcopy(obj) - index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) - _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) - result = ( - session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()]) - .repartition(2) - .orderBy(index_col_name) - .drop(index_col_name) - ) - return cast("NativePySpark", result) - - return _constructor - - -def sqlframe_pyspark_lazy_constructor(obj: Data) -> NativeSQLFrame: # pragma: no cover - pytest.importorskip("sqlframe") - pytest.importorskip("duckdb") - session = sqlframe_session() - return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()]) - - -@lru_cache(maxsize=1) -def _ibis_backend() -> IbisDuckDBBackend: # pragma: no cover - """Cached (singleton) in-memory backend to ensure all tables exist within the same in-memory database.""" - import ibis - - return ibis.duckdb.connect() - - -def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover - pytest.importorskip("ibis") - pytest.importorskip("polars") - import polars as pl - - ldf = pl.LazyFrame(obj) - table_name = str(uuid.uuid4()) - return _ibis_backend().create_table(table_name, ldf) - - -EAGER_CONSTRUCTORS: dict[str, ConstructorEager] = { - "pandas": pandas_constructor, - "pandas[nullable]": pandas_nullable_constructor, - "pandas[pyarrow]": pandas_pyarrow_constructor, - "pyarrow": pyarrow_table_constructor, - "modin": modin_constructor, - "modin[pyarrow]": modin_pyarrow_constructor, - "cudf": cudf_constructor, - "polars[eager]": polars_eager_constructor, -} -LAZY_CONSTRUCTORS: dict[str, ConstructorLazy] = { - "dask": dask_lazy_p2_constructor, - "polars[lazy]": polars_lazy_constructor, - "duckdb": duckdb_lazy_constructor, - "pyspark": pyspark_lazy_constructor, # type: ignore[dict-item] - "sqlframe": sqlframe_pyspark_lazy_constructor, - "ibis": ibis_lazy_constructor, -} -GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor} - - -def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: - if metafunc.config.getoption("use_external_constructor"): # pragma: no cover - return # let the plugin handle this - if metafunc.config.getoption("all_cpu_constructors"): # pragma: no cover - selected_constructors: list[str] = [ - *iter(EAGER_CONSTRUCTORS.keys()), - *iter(LAZY_CONSTRUCTORS.keys()), - ] - selected_constructors = [ - x - for x in selected_constructors - if x not in GPU_CONSTRUCTORS - and x - not in { - "modin", # too slow - "spark[connect]", # complex local setup; can't run together with local spark - } - ] - else: # pragma: no cover - opt = cast("str", metafunc.config.getoption("constructors")) - selected_constructors = opt.split(",") - - eager_constructors: list[ConstructorEager] = [] - eager_constructors_ids: list[str] = [] - constructors: list[Constructor] = [] - constructors_ids: list[str] = [] - - for constructor in selected_constructors: - if ( - constructor in {"pandas[nullable]", "pandas[pyarrow]"} - and MIN_PANDAS_NULLABLE_VERSION > PANDAS_VERSION - ): - continue # pragma: no cover - - if constructor in EAGER_CONSTRUCTORS: - eager_constructors.append(EAGER_CONSTRUCTORS[constructor]) - eager_constructors_ids.append(constructor) - constructors.append(EAGER_CONSTRUCTORS[constructor]) - elif constructor in {"pyspark", "pyspark[connect]"}: # pragma: no cover - constructors.append(pyspark_lazy_constructor()) - elif constructor in LAZY_CONSTRUCTORS: - constructors.append(LAZY_CONSTRUCTORS[constructor]) - else: # pragma: no cover - msg = f"Expected one of {EAGER_CONSTRUCTORS.keys()} or {LAZY_CONSTRUCTORS.keys()}, got {constructor}" - raise ValueError(msg) - constructors_ids.append(constructor) - - if "constructor_eager" in metafunc.fixturenames: - metafunc.parametrize( - "constructor_eager", eager_constructors, ids=eager_constructors_ids - ) - elif "constructor" in metafunc.fixturenames: - metafunc.parametrize("constructor", constructors, ids=constructors_ids) - elif "constructor_pandas_like" in metafunc.fixturenames: - pandas_like_constructors = [] - pandas_like_constructors_ids = [] - for fn, name in zip(eager_constructors, eager_constructors_ids): - if name in ID_PANDAS_LIKE: - pandas_like_constructors.append(fn) - pandas_like_constructors_ids.append(name) - metafunc.parametrize( - "constructor_pandas_like", - pandas_like_constructors, - ids=pandas_like_constructors_ids, - ) - - TEST_EAGER_BACKENDS: list[EagerAllowed] = [] TEST_EAGER_BACKENDS.extend( (Implementation.POLARS, "polars") if find_spec("polars") is not None else () @@ -390,3 +118,24 @@ def non_nested_type(request: pytest.FixtureRequest) -> type[NonNestedDType]: def nested_dtype(request: pytest.FixtureRequest) -> NestedOrEnumDType: dtype: NestedOrEnumDType = request.param return dtype + + +# The following fixtures are aliases of those registered in `narwhals/testing/pytest_plugin.py` +# in order to be backward compatible with the old fixture names and avoid having to change +# every single test. +# TODO(FBruzzesi): Rm once all tests start using nw_frame_constructor directly +@pytest.fixture +def constructor(nw_frame: FrameConstructor) -> FrameConstructor: + return nw_frame + + +@pytest.fixture +def constructor_eager(nw_dataframe: DataFrameConstructor) -> FrameConstructor: + return nw_dataframe + + +@pytest.fixture +def constructor_pandas_like( + nw_pandas_like_frame: DataFrameConstructor, +) -> FrameConstructor: + return nw_pandas_like_frame diff --git a/tests/dtypes/dtypes_test.py b/tests/dtypes/dtypes_test.py index 33fa61ac08..a233f955f3 100644 --- a/tests/dtypes/dtypes_test.py +++ b/tests/dtypes/dtypes_test.py @@ -9,13 +9,8 @@ import narwhals as nw from narwhals.exceptions import InvalidOperationError, PerformanceWarning -from tests.utils import ( - PANDAS_VERSION, - POLARS_VERSION, - PYARROW_VERSION, - assert_equal_hash, - pyspark_session, -) +from narwhals.testing.constructors import pyspark_session +from tests.utils import PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, assert_equal_hash if TYPE_CHECKING: from collections.abc import Iterable diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py index af0c464e5b..ec6c2ff8bc 100644 --- a/tests/expr_and_series/arithmetic_test.py +++ b/tests/expr_and_series/arithmetic_test.py @@ -98,16 +98,16 @@ def test_arithmetic_series( attr: str, rhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__mod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3]} - df = nw.from_native(constructor_eager(data), eager_only=True) + df = nw.from_native(nw_dataframe(data), eager_only=True) result = df.select(getattr(df["a"], attr)(rhs)) assert_equal_data(result, {"a": expected}) @@ -128,29 +128,29 @@ def test_right_arithmetic_series( attr: str, rhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__rmod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3]} - df = nw.from_native(constructor_eager(data), eager_only=True) + df = nw.from_native(nw_dataframe(data), eager_only=True) result_series = getattr(df["a"], attr)(rhs) assert result_series.name == "a" assert_equal_data({"a": result_series}, {"a": expected}) def test_truediv_same_dims( - constructor_eager: ConstructorEager, request: pytest.FixtureRequest + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest ) -> None: - if "polars" in str(constructor_eager): + if "polars" in str(nw_dataframe): # https://github.com/pola-rs/polars/issues/17760 request.applymarker(pytest.mark.xfail) - s_left = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"] - s_right = nw.from_native(constructor_eager({"a": [2, 2, 1]}), eager_only=True)["a"] + s_left = nw.from_native(nw_dataframe({"a": [1, 2, 3]}), eager_only=True)["a"] + s_right = nw.from_native(nw_dataframe({"a": [2, 2, 1]}), eager_only=True)["a"] result = s_left / s_right assert_equal_data({"a": result}, {"a": [0.5, 1.0, 3.0]}) result = s_left.__rtruediv__(s_right) @@ -160,13 +160,13 @@ def test_truediv_same_dims( @given(left=st.integers(-100, 100), right=st.integers(-100, 100)) @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available") @pytest.mark.slow -def test_floordiv(constructor_eager: ConstructorEager, *, left: int, right: int) -> None: - if any(x in str(constructor_eager) for x in ["modin", "cudf"]): +def test_floordiv(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None: + if any(x in str(nw_dataframe) for x in ["modin", "cudf"]): # modin & cudf are too slow here pytest.skip() assume(right != 0) expected = {"a": [left // right]} - result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select( + result = nw.from_native(nw_dataframe({"a": [left]}), eager_only=True).select( nw.col("a") // right ) assert_equal_data(result, expected) @@ -175,14 +175,14 @@ def test_floordiv(constructor_eager: ConstructorEager, *, left: int, right: int) @pytest.mark.slow @given(left=st.integers(-100, 100), right=st.integers(-100, 100)) @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available") -def test_mod(constructor_eager: ConstructorEager, *, left: int, right: int) -> None: - if any(x in str(constructor_eager) for x in ["pandas_pyarrow", "modin", "cudf"]): +def test_mod(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None: + if any(x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin", "cudf"]): # pandas[pyarrow] does not implement mod # modin & cudf are too slow here pytest.skip() assume(right != 0) expected = {"a": [left % right]} - result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select( + result = nw.from_native(nw_dataframe({"a": [left]}), eager_only=True).select( nw.col("a") % right ) assert_equal_data(result, expected) @@ -240,16 +240,16 @@ def test_arithmetic_series_left_literal( attr: str, lhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__mod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1.0, 2.0, 4.0]} - df = nw.from_native(constructor_eager(data)) + df = nw.from_native(nw_dataframe(data)) result = df.select(getattr(lhs, attr)(nw.col("a"))) assert_equal_data(result, {"literal": expected}) diff --git a/tests/expr_and_series/corr_test.py b/tests/expr_and_series/corr_test.py index 9152df69c8..15e371d62d 100644 --- a/tests/expr_and_series/corr_test.py +++ b/tests/expr_and_series/corr_test.py @@ -25,7 +25,7 @@ def test_corr_expr( output_name: str, a: str | nw.Expr, b: str | nw.Expr, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor) and expected_corr is None: request.applymarker( @@ -51,7 +51,7 @@ def test_corr_expr_spearman( output_name: str, a: str | nw.Expr, b: str | nw.Expr, - expected_corr: float, + expected_corr: float | None, ) -> None: context = ( does_not_raise() @@ -75,7 +75,7 @@ def test_corr_series( output_name: str, a: str, b: str, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor_eager) and expected_corr is None: request.applymarker( @@ -97,7 +97,7 @@ def test_corr_series_spearman( output_name: str, a: str, b: str, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor_eager) and expected_corr is None: request.applymarker( diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py index 65d1a6e3b6..8fd654ad6d 100644 --- a/tests/expr_and_series/dt/convert_time_zone_test.py +++ b/tests/expr_and_series/dt/convert_time_zone_test.py @@ -7,13 +7,13 @@ import pytest import narwhals as nw +from narwhals.testing.constructors import pyspark_session from tests.utils import ( PANDAS_VERSION, POLARS_VERSION, Constructor, assert_equal_data, is_windows, - pyspark_session, ) if TYPE_CHECKING: diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py index 1c9dff7d59..27bc394b69 100644 --- a/tests/expr_and_series/dt/replace_time_zone_test.py +++ b/tests/expr_and_series/dt/replace_time_zone_test.py @@ -7,13 +7,8 @@ import pytest import narwhals as nw -from tests.utils import ( - PANDAS_VERSION, - Constructor, - assert_equal_data, - is_windows, - pyspark_session, -) +from narwhals.testing.constructors import pyspark_session +from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data, is_windows if TYPE_CHECKING: from tests.utils import ConstructorEager diff --git a/tests/expr_and_series/fill_nan_test.py b/tests/expr_and_series/fill_nan_test.py index 132b553c50..1835d6c1f1 100644 --- a/tests/expr_and_series/fill_nan_test.py +++ b/tests/expr_and_series/fill_nan_test.py @@ -3,21 +3,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> None: if "cudf" in str(constructor): @@ -36,7 +23,7 @@ def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> N assert_equal_data(result, expected) assert result.lazy().collect()["float_na"].null_count() == 2 result = df.select(nw.all().fill_nan(3.0)) - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # no nan vs null distinction expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]} assert result.lazy().collect()["float_na"].null_count() == 0 @@ -55,7 +42,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None: "float_na" ] result = s.fill_nan(999) - if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor_eager.is_nullable: # no nan vs null distinction assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]}) elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index 16c59536ca..01579e3f3f 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -12,12 +12,6 @@ import narwhals as nw from narwhals.exceptions import ComputeError, InvalidOperationError -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import ( PANDAS_VERSION, PYARROW_VERSION, @@ -29,12 +23,6 @@ if TYPE_CHECKING: from narwhals.typing import NumericLiteral -NON_NULLABLE_CONSTRUCTORS = ( - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -) NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0 INF_POS, INF_NEG = float("inf"), float("-inf") @@ -126,7 +114,7 @@ def test_is_close_series_with_series( y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) - if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + if not constructor_eager.is_nullable: expected = [v if v is not None else nans_equal for v in expected] elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): expected = [ @@ -154,7 +142,7 @@ def test_is_close_series_with_scalar( y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) - if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + if not constructor_eager.is_nullable: expected = [v if v is not None else False for v in expected] elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): expected = [ @@ -199,7 +187,7 @@ def test_is_close_expr_with_expr( ) .sort("idx") ) - if constructor in NON_NULLABLE_CONSTRUCTORS: + if not constructor.is_nullable: expected = [v if v is not None else nans_equal for v in expected] elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): expected = [ @@ -240,7 +228,7 @@ def test_is_close_expr_with_scalar( ) .sort("idx") ) - if constructor in NON_NULLABLE_CONSTRUCTORS: + if not constructor.is_nullable: expected = [v if v is not None else False for v in expected] elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): expected = [ diff --git a/tests/expr_and_series/is_finite_test.py b/tests/expr_and_series/is_finite_test.py index eb07b2a41e..f55b106593 100644 --- a/tests/expr_and_series/is_finite_test.py +++ b/tests/expr_and_series/is_finite_test.py @@ -5,21 +5,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import POLARS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - data = {"a": [float("nan"), float("inf"), 2.0, None]} @@ -77,7 +64,7 @@ def test_is_finite_column_with_null(constructor: Constructor, data: list[float]) result = df.select(nw.col("a").is_finite()) expected: dict[str, list[Any]] - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = {"a": [True, True, False]} else: diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py index 27790e27b2..9dce78c535 100644 --- a/tests/expr_and_series/is_nan_test.py +++ b/tests/expr_and_series/is_nan_test.py @@ -5,21 +5,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - def test_nan(constructor: Constructor) -> None: data_na = {"int": [-1, 1, None]} @@ -33,7 +20,7 @@ def test_nan(constructor: Constructor) -> None: ) expected: dict[str, list[Any]] - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = { "int": [False, False, True], @@ -70,7 +57,7 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None: "float_na": df["float_na"].is_nan(), } expected: dict[str, list[Any]] - if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor_eager.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = { "int": [False, False, True], diff --git a/tests/expr_and_series/list/get_test.py b/tests/expr_and_series/list/get_test.py index 52ca3386ba..338ab6197a 100644 --- a/tests/expr_and_series/list/get_test.py +++ b/tests/expr_and_series/list/get_test.py @@ -45,9 +45,8 @@ def test_get_series( pytest.skip() pytest.importorskip("pyarrow") - if ( - constructor_eager.__name__.startswith("pandas") - and "pyarrow" not in constructor_eager.__name__ + if str(constructor_eager).startswith("pandas") and "pyarrow" not in str( + constructor_eager ): df = nw.from_native(constructor_eager(data), eager_only=True) msg = re.escape("Series must be of PyArrow List type to support list namespace.") diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py index 1249f7f2e2..86f9bfe2eb 100644 --- a/tests/expr_and_series/nth_test.py +++ b/tests/expr_and_series/nth_test.py @@ -1,17 +1,14 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals as nw from tests.utils import POLARS_VERSION, Constructor, assert_equal_data -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} +data: dict[str, list[Any]] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} @pytest.mark.parametrize( diff --git a/tests/expr_and_series/str/split_test.py b/tests/expr_and_series/str/split_test.py index b6b25cd024..f206b84e35 100644 --- a/tests/expr_and_series/str/split_test.py +++ b/tests/expr_and_series/str/split_test.py @@ -20,8 +20,7 @@ ) def test_str_split(constructor: Constructor, by: str, expected: Any) -> None: if "cudf" not in str(constructor) and ( - constructor.__name__.startswith("pandas") - and "pyarrow" not in constructor.__name__ + str(constructor).startswith("pandas") and "pyarrow" not in str(constructor) ): df = nw.from_native(constructor(data)) msg = re.escape("This operation requires a pyarrow-backed series. ") @@ -44,8 +43,8 @@ def test_str_split_series( constructor_eager: ConstructorEager, by: str, expected: Any ) -> None: if "cudf" not in str(constructor_eager) and ( - constructor_eager.__name__.startswith("pandas") - and "pyarrow" not in constructor_eager.__name__ + str(constructor_eager).startswith("pandas") + and "pyarrow" not in str(constructor_eager) ): df = nw.from_native(constructor_eager(data), eager_only=True) msg = re.escape("This operation requires a pyarrow-backed series. ") diff --git a/tests/frame/group_by_test.py b/tests/frame/group_by_test.py index 57aacae09b..788a5363a4 100644 --- a/tests/frame/group_by_test.py +++ b/tests/frame/group_by_test.py @@ -26,7 +26,7 @@ from narwhals.typing import NonNestedLiteral -data: Mapping[str, Any] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} +data: dict[str, list[Any]] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} POLARS_COLLECT_STREAMING_ENGINE = os.environ.get("NARWHALS_POLARS_NEW_STREAMING", None) diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py index 79a92ef6c9..0face73928 100644 --- a/tests/frame/interchange_native_namespace_test.py +++ b/tests/frame/interchange_native_namespace_test.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest @@ -9,10 +9,7 @@ pytest.importorskip("polars") import polars as pl -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} def test_interchange() -> None: @@ -60,9 +57,9 @@ def test_duckdb() -> None: pytest.importorskip("duckdb") import duckdb - df_pl = pl.DataFrame(data) # noqa: F841 + _df_pl = pl.DataFrame(data) - rel = duckdb.sql("select * from df_pl") + rel = duckdb.sql("select * from _df_pl") df = nw_v1.from_native(rel, eager_or_interchange_only=True) series = df["a"] diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py index a927ba18c6..90279f0296 100644 --- a/tests/frame/interchange_select_test.py +++ b/tests/frame/interchange_select_test.py @@ -1,16 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals as nw import narwhals.stable.v1 as nw_v1 -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} class InterchangeDataFrame: diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py index 2277d498ea..e8604f816d 100644 --- a/tests/frame/interchange_to_arrow_test.py +++ b/tests/frame/interchange_to_arrow_test.py @@ -1,15 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals.stable.v1 as nw_v1 -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} pytest.importorskip("polars") pytest.importorskip("pyarrow") diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 42d52adafc..7ca4754b2d 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -512,16 +512,27 @@ def test_joinasof_numeric( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = from_native_lazy( - constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) - ).sort("antananarivo") - df_right = from_native_lazy( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("antananarivo") - result = df.join_asof( - df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy - ) - result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) + + data_left = {"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]} + data_right = {"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]} + left_lf = from_native_lazy(constructor(data_left)).sort("antananarivo") + right_lf = from_native_lazy(constructor(data_right)).sort("antananarivo") + + result: nw.DataFrame[Any] | nw.LazyFrame[Any] + result_on: nw.DataFrame[Any] | nw.LazyFrame[Any] + if constructor.is_lazy: + result = left_lf.join_asof( + right_lf, left_on="antananarivo", right_on="antananarivo", strategy=strategy + ) + result_on = left_lf.join_asof(right_lf, on="antananarivo", strategy=strategy) + + else: + left_df, right_df = left_lf.collect(), right_lf.collect() + result = left_df.join_asof( + right_df, left_on="antananarivo", right_on="antananarivo", strategy=strategy + ) + result_on = left_df.join_asof(right_df, on="antananarivo", strategy=strategy) + assert_equal_data(result.sort(by="antananarivo"), expected) assert_equal_data(result_on.sort(by="antananarivo"), expected) @@ -754,13 +765,16 @@ def test_joinasof_by_exceptions( message: str, ) -> None: data = {ON: [1, 3, 2], BY: [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) - if isinstance(df, nw.LazyFrame): + frame = from_native_lazy(constructor(data)) + + if constructor.is_lazy: with pytest.raises(ValueError, match=message): - df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + frame.join_asof(frame, on=on, by_left=by_left, by_right=by_right, by=by) else: with pytest.raises(ValueError, match=message): - df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + frame.collect().join_asof( + frame.collect(), on=on, by_left=by_left, by_right=by_right, by=by + ) def test_join_duplicate_column_names( @@ -777,7 +791,7 @@ def test_join_duplicate_column_names( ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]} - df = nw.from_native(constructor(data)) + lf = from_native_lazy(constructor(data)) if any( x in str(constructor) for x in ("pandas", "pandas[pyarrow]", "pandas[nullable]", "dask") @@ -796,10 +810,12 @@ def test_join_duplicate_column_names( request.applymarker(pytest.mark.xfail) else: exception = nw.exceptions.DuplicateError - if isinstance(df, nw.LazyFrame): + + if constructor.is_lazy: with pytest.raises(exception): - df.join(df, on=["a"]).join(df, on=["a"]).collect() + lf.join(lf, on=["a"]).join(lf, on=["a"]).collect() else: + df = lf.collect() with pytest.raises(exception): df.join(df, on=["a"]).join(df, on=["a"]) diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py index 9e671c68d2..658a61c68b 100644 --- a/tests/frame/lazy_test.py +++ b/tests/frame/lazy_test.py @@ -9,13 +9,8 @@ import narwhals as nw from narwhals._utils import Implementation from narwhals.dependencies import get_cudf, get_modin -from tests.utils import ( - DUCKDB_VERSION, - PANDAS_VERSION, - assert_equal_data, - pyspark_session, - sqlframe_session, -) +from narwhals.testing.constructors import pyspark_session, sqlframe_session +from tests.utils import DUCKDB_VERSION, PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from narwhals._typing import LazyAllowed, SparkLike diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index d90916b029..93779cd129 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -588,8 +588,8 @@ def origin_pandas_like_pyarrow( if PANDAS_VERSION < (1, 5): pytest.skip(reason="pandas too old for `pyarrow`") name_pandas_like = {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"} - if constructor_pandas_like.__name__ not in name_pandas_like: - pytest.skip(f"{constructor_pandas_like.__name__!r} is not pandas_like_pyarrow") + if str(constructor_pandas_like) not in name_pandas_like: + pytest.skip(f"{constructor_pandas_like!s} is not pandas_like_pyarrow") data = { "a": [2, 1], "b": ["hello", "hi"], diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py index 473b685c19..b74c9a98b1 100644 --- a/tests/frame/to_pandas_test.py +++ b/tests/frame/to_pandas_test.py @@ -22,7 +22,7 @@ def test_convert_pandas(constructor_eager: ConstructorEager) -> None: df_raw = constructor_eager(data) result = nw.from_native(df_raw, eager_only=True).to_pandas() - if constructor_eager.__name__.startswith("pandas"): + if str(constructor_eager).startswith("pandas"): expected = cast("pd.DataFrame", constructor_eager(data)) elif "modin_pyarrow" in str(constructor_eager): expected = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") diff --git a/tests/frame/to_polars_test.py b/tests/frame/to_polars_test.py index 60ca653f32..89d4a65b2a 100644 --- a/tests/frame/to_polars_test.py +++ b/tests/frame/to_polars_test.py @@ -1,14 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import pytest import narwhals as nw if TYPE_CHECKING: - from collections.abc import Mapping - + from narwhals.testing.typing import Data from tests.utils import ConstructorEager pytest.importorskip("polars") @@ -20,7 +19,7 @@ def test_convert_polars(constructor_eager: ConstructorEager) -> None: pytest.importorskip("pyarrow") from polars.testing import assert_frame_equal - data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} + data: Data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} df_raw = constructor_eager(data) result = nw.from_native(df_raw).to_polars() diff --git a/tests/hypothesis/getitem_test.py b/tests/hypothesis/getitem_test.py index 759a292f97..d9fc517f60 100644 --- a/tests/hypothesis/getitem_test.py +++ b/tests/hypothesis/getitem_test.py @@ -1,29 +1,30 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, cast import hypothesis.strategies as st import pytest from hypothesis import assume, given import narwhals as nw -from tests.conftest import pandas_constructor, pyarrow_table_constructor +from narwhals.testing.constructors import get_backend_constructor from tests.utils import assert_equal_data if TYPE_CHECKING: from collections.abc import Sequence - from narwhals.typing import IntoDataFrame + from narwhals.testing.typing import DataFrameConstructor pytest.importorskip("pandas") pytest.importorskip("polars") import polars as pl -@pytest.fixture(params=[pandas_constructor, pyarrow_table_constructor], scope="module") -def pandas_or_pyarrow_constructor( - request: pytest.FixtureRequest, -) -> Callable[[Any], IntoDataFrame]: +@pytest.fixture( + params=[get_backend_constructor("pandas"), get_backend_constructor("pyarrow")], + scope="module", +) +def pandas_or_pyarrow_constructor(request: pytest.FixtureRequest) -> DataFrameConstructor: return request.param # type: ignore[no-any-return] @@ -117,7 +118,9 @@ def tuple_selector(draw: st.DrawFn) -> tuple[Any, Any]: @given(selector=st.one_of(single_selector, tuple_selector())) @pytest.mark.slow -def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: +def test_getitem( + pandas_or_pyarrow_constructor: DataFrameConstructor, selector: Any +) -> None: """Compare __getitem__ against polars.""" # TODO(PR - clean up): documenting current differences # These assume(...) lines each filter out a known difference. @@ -125,7 +128,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: # NotImplementedError: Slicing with step is not supported on PyArrow tables assume( not ( - pandas_or_pyarrow_constructor is pyarrow_table_constructor + pandas_or_pyarrow_constructor.is_pyarrow and isinstance(selector, slice) and selector.step is not None ) @@ -134,7 +137,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: # NotImplementedError: Slicing with step is not supported on PyArrow tables assume( not ( - pandas_or_pyarrow_constructor is pyarrow_table_constructor + pandas_or_pyarrow_constructor.is_pyarrow and isinstance(selector, tuple) and ( (isinstance(selector[0], slice) and selector[0].step is not None) diff --git a/tests/hypothesis/join_test.py b/tests/hypothesis/join_test.py index 037854a861..fe40218c9a 100644 --- a/tests/hypothesis/join_test.py +++ b/tests/hypothesis/join_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math from typing import TYPE_CHECKING, Any, cast import pytest @@ -44,7 +45,10 @@ def test_join( # pragma: no cover floats: st.SearchStrategy[list[float]], cols: st.SearchStrategy[list[str]], ) -> None: - data: Mapping[str, Any] = {"a": integers, "b": other_integers, "c": floats} + # See https://github.com/narwhals-dev/narwhals/issues/3554 + # for why we need to assume that all float values are finite + assume(all(math.isfinite(f) for f in cast("list[float]", floats))) + data: dict[str, Any] = {"a": integers, "b": other_integers, "c": floats} join_cols = cast("list[str]", cols) df_polars = pl.DataFrame(data) diff --git a/tests/ibis_test.py b/tests/ibis_test.py index 14a93c8ef8..10dce38413 100644 --- a/tests/ibis_test.py +++ b/tests/ibis_test.py @@ -1,30 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any - import pytest import narwhals as nw - -if TYPE_CHECKING: - import ibis - import polars as pl - - from tests.utils import Constructor -else: - ibis = pytest.importorskip("ibis") - pl = pytest.importorskip("polars") - - -@pytest.fixture -def ibis_constructor() -> Constructor: - def func(data: dict[str, Any]) -> ibis.Table: - df = pl.DataFrame(data) - return ibis.memtable(df) - - return func +from narwhals.testing.constructors import get_backend_constructor -def test_from_native(ibis_constructor: Constructor) -> None: +def test_from_native() -> None: + ibis_constructor = get_backend_constructor("ibis") + if not ibis_constructor.is_available: + pytest.skip() df = nw.from_native(ibis_constructor({"a": [1, 2, 3], "b": [4, 5, 6]})) assert df.columns == ["a", "b"] diff --git a/tests/modern_polars/method_chaining_test.py b/tests/modern_polars/method_chaining_test.py index 611f85973e..ba7a06b894 100644 --- a/tests/modern_polars/method_chaining_test.py +++ b/tests/modern_polars/method_chaining_test.py @@ -38,10 +38,7 @@ def test_split_list_get(request: pytest.FixtureRequest, constructor: Constructor if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - if ( - constructor.__name__.startswith("pandas") - and "pyarrow" not in constructor.__name__ - ): + if str(constructor).startswith("pandas") and "pyarrow" not in str(constructor): df = nw.from_native(constructor(data)) msg = re.escape("This operation requires a pyarrow-backed series. ") with pytest.raises(TypeError, match=msg): diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 4548f76a87..0e0c94d994 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -6,13 +6,8 @@ import pytest import narwhals as nw -from tests.utils import ( - PANDAS_VERSION, - Constructor, - assert_equal_data, - pyspark_session, - sqlframe_session, -) +from narwhals.testing.constructors import pyspark_session, sqlframe_session +from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data pytest.importorskip("polars") pytest.importorskip("pyarrow") @@ -32,7 +27,7 @@ IOSourceKind: TypeAlias = Literal["str", "Path", "PathLike"] -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} skipif_pandas_lt_1_5 = pytest.mark.skipif( PANDAS_VERSION < (1, 5), reason="too old for pyarrow" ) diff --git a/tests/series_only/hist_test.py b/tests/series_only/hist_test.py index 183c0a13ff..9bf4f26c62 100644 --- a/tests/series_only/hist_test.py +++ b/tests/series_only/hist_test.py @@ -11,11 +11,14 @@ import narwhals as nw from narwhals.exceptions import ComputeError +from narwhals.testing.constructors import get_backend_constructor from tests.utils import POLARS_VERSION, ConstructorEager, assert_equal_data if TYPE_CHECKING: from collections.abc import Sequence + from narwhals.testing.constructors import EagerName + rnd = Random(0) # noqa: S311 data: dict[str, Any] = { @@ -43,7 +46,16 @@ param_include_breakpoint = pytest.mark.parametrize( "include_breakpoint", [True, False], ids=["breakpoint-True", "breakpoint-False"] ) -param_library = pytest.mark.parametrize("library", ["pandas", "polars", "pyarrow"]) +param_name = pytest.mark.parametrize("name", ["pandas", "polars[eager]", "pyarrow"]) + + +def maybe_name_to_constructor(name: EagerName) -> ConstructorEager: + constructor = get_backend_constructor(name) + if constructor.is_available: + return constructor + + pytest.skip() + SHIFT_BINS_BY = 10 """shift bins property""" @@ -63,31 +75,15 @@ ], ids=str, ) -@param_library +@param_name def test_hist_bin( - library: str, + name: EagerName, bins: list[float], expected: Sequence[float], *, include_breakpoint: bool, ) -> None: - constructor_eager: ConstructorEager - pytest.importorskip(library) - if library == "pandas": - import pandas as pd - - constructor_eager = pd.DataFrame - elif library == "polars": - import polars as pl - - constructor_eager = pl.DataFrame - else: - import pyarrow as pa - - pytest.importorskip("numpy") - - constructor_eager = pa.table - + constructor_eager = maybe_name_to_constructor(name) df = nw.from_native(constructor_eager(data)).with_columns( float=nw.col("int").cast(nw.Float64) ) @@ -130,22 +126,11 @@ def test_hist_bin( @pytest.mark.parametrize("params", counts_and_expected) @param_include_breakpoint -@param_library +@param_name def test_hist_count( - library: str, *, params: dict[str, Any], include_breakpoint: bool + name: EagerName, *, params: dict[str, Any], include_breakpoint: bool ) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table + constructor_eager = maybe_name_to_constructor(name) df = nw.from_native(constructor_eager(data)).with_columns( float=nw.col("int").cast(nw.Float64) ) @@ -186,20 +171,9 @@ def test_hist_count( ) -@param_library -def test_hist_count_no_spread(library: str) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table +@param_name +def test_hist_count_no_spread(name: EagerName) -> None: + constructor_eager = maybe_name_to_constructor(name) data = {"all_zero": [0, 0, 0], "all_non_zero": [5, 5, 5]} df = nw.from_native(constructor_eager(data)) @@ -229,20 +203,9 @@ def test_hist_bin_and_bin_count() -> None: @param_include_breakpoint -@param_library -def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table +@param_name +def test_hist_no_data(name: EagerName, *, include_breakpoint: bool) -> None: + constructor_eager = maybe_name_to_constructor(name) s = nw.from_native(constructor_eager({"values": []})).select( nw.col("values").cast(nw.Float64) )["values"] @@ -262,20 +225,9 @@ def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None: assert result["count"].sum() == 0 -@param_library -def test_hist_small_bins(library: str) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table +@param_name +def test_hist_small_bins(name: EagerName) -> None: + constructor_eager = maybe_name_to_constructor(name) s = nw.from_native(constructor_eager({"values": [1, 2, 3]})) result = s["values"].hist(bins=None, bin_count=None) assert len(result) == 10 @@ -323,24 +275,13 @@ def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None: POLARS_VERSION < (1, 27), reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior", ) -@param_library +@param_name @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") @pytest.mark.slow def test_hist_bin_hypotheis( - library: str, data: list[float], bin_deltas: list[float] + name: EagerName, data: list[float], bin_deltas: list[float] ) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table + constructor_eager = maybe_name_to_constructor(name) pytest.importorskip("polars") import polars as pl @@ -376,25 +317,15 @@ def test_hist_bin_hypotheis( reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior", ) @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") -@param_library +@param_name @pytest.mark.slow def test_hist_count_hypothesis( - library: str, data: list[float], bin_count: int, request: pytest.FixtureRequest + name: EagerName, data: list[float], bin_count: int, request: pytest.FixtureRequest ) -> None: pytest.importorskip("polars") import polars as pl - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table + constructor_eager = maybe_name_to_constructor(name) df = nw.from_native(constructor_eager({"values": data})).select( nw.col("values").cast(nw.Float64) ) diff --git a/tests/series_only/is_sorted_test.py b/tests/series_only/is_sorted_test.py index 046669aac0..4efddf542f 100644 --- a/tests/series_only/is_sorted_test.py +++ b/tests/series_only/is_sorted_test.py @@ -16,7 +16,7 @@ ) def test_is_sorted( constructor_eager: ConstructorEager, - input_data: str, + input_data: list[int], descending: bool, # noqa: FBT001 expected: bool, # noqa: FBT001 ) -> None: diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py index c1b3b4e357..9b0f813b2f 100644 --- a/tests/testing/assert_frame_equal_test.py +++ b/tests/testing/assert_frame_equal_test.py @@ -42,8 +42,7 @@ def test_implementation_mismatch() -> None: with _assertion_error("implementation mismatch"): assert_frame_equal( - nw.from_native(pd.DataFrame({"a": [1]})), - nw.from_native(pa.table({"a": [1]})), # type: ignore[type-var] # pyright: ignore[reportArgumentType] + nw.from_native(pd.DataFrame({"a": [1]})), nw.from_native(pa.table({"a": [1]})) ) diff --git a/tests/testing/constructors_test.py b/tests/testing/constructors_test.py new file mode 100644 index 0000000000..af22cee8a1 --- /dev/null +++ b/tests/testing/constructors_test.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw +from narwhals._utils import Implementation +from narwhals.testing.constructors import ( + available_backends, + get_backend_constructor, + prepare_backends, +) + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + PropertyName: TypeAlias = str + TrueNames: TypeAlias = set[str] + FalseNames: TypeAlias = set[str] + + +def test_eager_returns_eager_frame() -> None: + c = get_backend_constructor("pandas") + if not c.is_available: + pytest.skip() + + df = nw.from_native(c({"x": [1, 2, 3]})) + assert isinstance(df, nw.DataFrame) + + +def test_lazy_returns_lazy_frame() -> None: + c = get_backend_constructor("polars[lazy]") + if not c.is_available: + pytest.skip() + + lf = nw.from_native(c({"x": [1, 2, 3]})) + assert isinstance(lf, nw.LazyFrame) + + +_IS_PROPERTY_CASES: list[tuple[PropertyName, TrueNames, FalseNames]] = [ + ("is_pandas", {"pandas", "pandas[nullable]", "pandas[pyarrow]"}, {"polars[eager]"}), + ("is_modin", {"modin", "modin[pyarrow]"}, {"pandas"}), + ("is_cudf", {"cudf"}, {"pandas"}), + ("is_pandas_like", {"pandas", "modin", "cudf"}, {"polars[eager]"}), + ("is_polars", {"polars[eager]", "polars[lazy]"}, {"pandas"}), + ("is_pyarrow", {"pyarrow"}, {"pandas"}), + ("is_dask", {"dask"}, {"pandas"}), + ("is_duckdb", {"duckdb"}, {"pandas"}), + ("is_pyspark", {"pyspark", "pyspark[connect]"}, {"pandas"}), + ("is_sqlframe", {"sqlframe"}, {"pandas"}), + ("is_ibis", {"ibis"}, {"pandas"}), + ("is_spark_like", {"pyspark", "sqlframe", "pyspark[connect]"}, {"pandas"}), + ("is_lazy", {"polars[lazy]", "dask", "duckdb"}, {"pandas"}), + ("needs_pyarrow", {"pyarrow", "duckdb", "ibis"}, {"pandas"}), + ("is_nullable", {"polars[eager]"}, {"pandas", "modin", "dask"}), +] + + +@pytest.mark.parametrize(("prop", "true_names", "false_names"), _IS_PROPERTY_CASES) +def test_constructor_is_properties( + prop: str, true_names: TrueNames, false_names: FalseNames +) -> None: + for name in true_names: + c = get_backend_constructor(name) + assert getattr(c, prop), f"{name}.{prop} should be True" + for name in false_names: + c = get_backend_constructor(name) + assert not getattr(c, prop), f"{name}.{prop} should be False" + + +def test_constructor_implementation() -> None: + assert get_backend_constructor("pandas").implementation is Implementation.PANDAS + assert ( + get_backend_constructor("pandas[pyarrow]").implementation is Implementation.PANDAS + ) + assert ( + get_backend_constructor("polars[eager]").implementation is Implementation.POLARS + ) + assert ( + get_backend_constructor("pyspark[connect]").implementation + is Implementation.PYSPARK_CONNECT + ) + + +def test_constructor_dunder() -> None: + c1 = get_backend_constructor("pandas") + c2 = get_backend_constructor("pandas") + assert c1.identifier == "pandas" + assert c1 == c2 + assert hash(c1) == hash(c2) + assert c1 != get_backend_constructor("polars[eager]") + assert c1 != "not a constructor" + + +def test_get_backend_constructor_invalid_name() -> None: + with pytest.raises(ValueError, match="Unknown constructor"): + get_backend_constructor("not_a_backend") + + +@pytest.mark.parametrize( + ("include", "exclude", "expected"), + [ + (None, None, available_backends()), + (None, ["pandas"], available_backends() - {"pandas"}), + (["pandas", "polars[eager]"], None, {"pandas", "polars[eager]"}), + (["pandas", "polars[eager]"], ["pandas"], {"polars[eager]"}), + ([], None, frozenset()), + ], +) +def test_prepare_backends( + include: list[str] | None, exclude: list[str] | None, expected: frozenset[str] +) -> None: + for name in (*(include or ()), *(exclude or ())): + if not get_backend_constructor(name).is_available: + pytest.skip(f"{name} not installed") + result = prepare_backends(include=include, exclude=exclude) + assert {c.name for c in result} == expected + + +@pytest.mark.parametrize("kwarg", ["include", "exclude"]) +def test_prepare_backends_unknown_name_raises(kwarg: str) -> None: + with pytest.raises(ValueError, match="not known constructors"): + prepare_backends(**{kwarg: ["not_a_backend"]}) diff --git a/tests/testing/plugin_test.py b/tests/testing/plugin_test.py new file mode 100644 index 0000000000..9f7b6e45a8 --- /dev/null +++ b/tests/testing/plugin_test.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import pytest + +pytest_plugins = ["pytester"] + + +def test_constructor_eager_fixture_runs_for_each_backend( + pytester: pytest.Pytester, +) -> None: + pytest.importorskip("pandas") + pytest.importorskip("polars") + pytest.importorskip("pyarrow") + + pytester.makeconftest("") + pytester.makepyfile(""" + import narwhals as nw + from narwhals.testing.typing import DataFrameConstructor + + def test_shape(nw_dataframe: DataFrameConstructor) -> None: + df = nw.from_native(nw_dataframe({"x": [1, 2, 3]}), eager_only=True) + assert df.shape == (3, 1) + """) + result = pytester.runpytest_subprocess( + "-v", "-p", "no:randomly", "--nw-backends=pandas,polars[eager],pyarrow" + ) + result.assert_outcomes(passed=3) + result.stdout.fnmatch_lines( + [ + "*test_shape?pandas?*", + "*test_shape?polars[[]eager[]]?*", + "*test_shape?pyarrow?*", + ] + ) + + +def test_constructor_fixture_includes_lazy_backends(pytester: pytest.Pytester) -> None: + pytest.importorskip("pandas") + pytest.importorskip("polars") + pytest.importorskip("duckdb") + + pytester.makeconftest("") + pytester.makepyfile(""" + import narwhals as nw + from narwhals.testing.typing import FrameConstructor + + def test_columns(nw_frame: FrameConstructor) -> None: + df = nw.from_native(nw_frame({"x": [1, 2, 3]})) + assert df.collect_schema().names() == ["x"] + """) + result = pytester.runpytest_subprocess( + "-v", "--nw-backends=pandas,polars[lazy],duckdb" + ) + result.assert_outcomes(passed=3) + + +def test_external_constructor_disables_parametrisation(pytester: pytest.Pytester) -> None: + pytester.makeconftest("") + pytester.makepyfile(""" + from narwhals.testing.typing import DataFrameConstructor + + def test_unparam(nw_dataframe: DataFrameConstructor) -> None: + pass + """) + result = pytester.runpytest_subprocess("--use-external-nw-backend") + # Without external parametrisation in place, the fixture is missing. + result.assert_outcomes(errors=1) diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 8d076699c0..a0d94b99fb 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -30,7 +30,7 @@ import narwhals as nw from narwhals._utils import Version -from tests.conftest import sqlframe_pyspark_lazy_constructor +from narwhals.testing.constructors import get_backend_constructor from tests.utils import Constructor, maybe_get_modin_df if TYPE_CHECKING: @@ -294,10 +294,10 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None: def test_series_only_sqlframe() -> None: # pragma: no cover pytest.importorskip("sqlframe") - df = sqlframe_pyspark_lazy_constructor(data) + df = get_backend_constructor("sqlframe")(data) with pytest.raises(TypeError, match="Cannot only use `series_only`"): - nw.from_native(df, series_only=True) # pyright: ignore[reportArgumentType, reportCallIssue] + nw.from_native(df, series_only=True) # type: ignore[call-overload] @pytest.mark.parametrize( @@ -315,7 +315,7 @@ def test_series_only_sqlframe() -> None: # pragma: no cover ) def test_eager_only_sqlframe(eager_only: Any, context: Any) -> None: # pragma: no cover pytest.importorskip("sqlframe") - df = sqlframe_pyspark_lazy_constructor(data) + df = get_backend_constructor("sqlframe")(data) with context: res = nw.from_native(df, eager_only=eager_only) diff --git a/tests/translate/get_native_namespace_test.py b/tests/translate/get_native_namespace_test.py index 821443ea64..5a15069ed2 100644 --- a/tests/translate/get_native_namespace_test.py +++ b/tests/translate/get_native_namespace_test.py @@ -76,7 +76,7 @@ def test_native_namespace_frame(constructor: Constructor) -> None: def test_native_namespace_series(constructor_eager: ConstructorEager) -> None: - constructor_name = constructor_eager.__name__ + constructor_name = str(constructor_eager) expected_namespace = _get_expected_namespace(constructor_name=constructor_name) diff --git a/tests/utils.py b/tests/utils.py index 4d01223b2a..3281951896 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,25 +6,32 @@ import warnings from datetime import date, datetime from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, Callable import pytest import narwhals as nw from narwhals._utils import Implementation, parse_version, zip_strict from narwhals.dependencies import get_pandas +from narwhals.testing.typing import ( + # TODO(FBruzzesi): Remove these aliases once all the test suite migrates to *FrameConstructor's + DataFrameConstructor as ConstructorEager, + FrameConstructor as Constructor, +) from narwhals.translate import from_native if TYPE_CHECKING: from collections.abc import Mapping, Sequence import pandas as pd - from pyspark.sql import SparkSession - from sqlframe.duckdb import DuckDBSession from typing_extensions import TypeAlias - from narwhals._native import NativeLazyFrame - from narwhals.typing import Frame, IntoDataFrame, TimeUnit + from narwhals.typing import Frame, TimeUnit + +# TODO(FBruzzesi): Remove these aliases once all the test suite migrates to *FrameConstructor's +# NOTE: Explicitly exported otherwise mypy will raise an [attr-defined] error for each file +# importing them from `tests.utils` rather than `narwhals.testing.typing` directly. +__all__ = ("Constructor", "ConstructorEager") def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -44,9 +51,6 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | IntoDataFrame"] -ConstructorEager: TypeAlias = Callable[[Any], "IntoDataFrame"] -ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"] NestedOrEnumDType: TypeAlias = "nw.List | nw.Array | nw.Struct | nw.Enum" @@ -174,33 +178,6 @@ def assert_equal_hash(left: Any, right: Any) -> None: ) -def sqlframe_session() -> DuckDBSession: - from sqlframe.duckdb import DuckDBSession - - # NOTE: `__new__` override inferred by `pyright` only - # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184 - return cast("DuckDBSession", DuckDBSession()) # type: ignore[redundant-cast] - - -def pyspark_session() -> SparkSession: # pragma: no cover - if is_spark_connect := os.environ.get("SPARK_CONNECT", None): - from pyspark.sql.connect.session import SparkSession - else: - from pyspark.sql import SparkSession - builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests") - builder = ( - builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}") - if is_spark_connect - else builder.master("local[1]").config("spark.ui.enabled", "false") - ) - return ( - builder.config("spark.default.parallelism", "1") - .config("spark.sql.shuffle.partitions", "2") - .config("spark.sql.session.timeZone", "UTC") - .getOrCreate() - ) - - def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any: # pragma: no cover """Convert a pandas DataFrame to a Modin DataFrame if Modin is available.""" try: @@ -230,10 +207,7 @@ def is_pyarrow_windows_no_tzdata(constructor: Constructor, /) -> bool: def uses_pyarrow_backend(constructor: Constructor | ConstructorEager) -> bool: """Checks if the pandas-like constructor uses pyarrow backend.""" - return constructor.__name__ in { - "pandas_pyarrow_constructor", - "modin_pyarrow_constructor", - } + return str(constructor) in {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"} def maybe_collect(df: Frame) -> Frame: diff --git a/tpch/tests/conftest.py b/tpch/tests/conftest.py index d98c4b401a..499571a567 100644 --- a/tpch/tests/conftest.py +++ b/tpch/tests/conftest.py @@ -36,13 +36,6 @@ def pytest_configure(config: pytest.Config) -> None: def pytest_addoption(parser: pytest.Parser) -> None: - from tests.conftest import DEFAULT_CONSTRUCTORS - - parser.addoption( - "--constructors", - default=DEFAULT_CONSTRUCTORS, - help="", - ) parser.addoption( "--scale-factor", default=constants.SCALE_FACTOR_DEFAULT, diff --git a/utils/import_check.py b/utils/import_check.py index d292b40790..d97b488509 100644 --- a/utils/import_check.py +++ b/utils/import_check.py @@ -27,6 +27,20 @@ "_polars": {"polars"}, "_duckdb": {"duckdb"}, "_ibis": {"ibis", "ibis._", "ibis.expr.types"}, + # narwhals.testing constructors deliberately lazy-import every supported + # backend inside `__call__` so test fixtures can build native frames. + "testing": { + "cudf", + "dask", + "dask.dataframe", + "duckdb", + "ibis", + "modin", + "pandas", + "polars", + "pyarrow", + "pyspark", + }, } diff --git a/utils/sort_api_reference.py b/utils/sort_api_reference.py index 1b417ed63a..243ccbcd6d 100644 --- a/utils/sort_api_reference.py +++ b/utils/sort_api_reference.py @@ -42,7 +42,7 @@ def sort_list(match: re.Match[str]) -> str: PATH = Path("docs") / "api-reference" -FILES_TO_SKIP = {"dtypes", "typing"} +FILES_TO_SKIP = {"dtypes", "typing", "testing"} ret = max( sort_members_in_markdown(file_path=file_path)