diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index b21ce3cea9..276695aed8 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -53,6 +53,7 @@ - top_k - unique - unpivot + - with_backend - with_columns - with_row_index - write_csv diff --git a/docs/basics/dataframe_conversion.md b/docs/basics/dataframe_conversion.md index ebc072a010..0aec769ae5 100644 --- a/docs/basics/dataframe_conversion.md +++ b/docs/basics/dataframe_conversion.md @@ -63,3 +63,17 @@ print(df_to_polars(df_duckdb)) ``` It works to pass Polars to `backend` here because Polars supports the [PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for import. + +## Switching eager backend while staying in Narwhals + +`to_pandas`, `to_polars` and `to_arrow` return the native object, taking you out of the Narwhals layer. +If you want to keep chaining Narwhals methods, use `DataFrame.with_backend` instead: + +```python exec="yes" source="above" session="conversion" result="python" +df = nw.from_native(df_pandas) +print(df.with_backend("polars").implementation) +print(df.with_backend("pyarrow").implementation) +``` + +`with_backend` accepts a string (`"pandas"`, `"modin"`, `"cudf"`, `"pyarrow"`, `"polars"`), +an `Implementation`, or a module. diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 3e7810616c..0869533b54 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -26,6 +26,7 @@ ) from narwhals._typing_compat import assert_never from narwhals._utils import ( + Implementation, ValidateBackendVersion, Version, _StoresNative, @@ -56,7 +57,7 @@ from narwhals._spark_like.utils import SparkSession from narwhals._translate import IntoArrowTable from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl - from narwhals._utils import Implementation, _LimitedContext + from narwhals._utils import _LimitedContext from narwhals.dataframe import DataFrame from narwhals.dtypes import DType from narwhals.exceptions import ColumnNotFoundError @@ -242,6 +243,7 @@ def is_unique(self) -> CompliantSeriesT: ... def lazy( self, backend: _LazyAllowedImpl | None, *, session: SparkSession | None ) -> CompliantLazyFrameAny: ... + def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny: ... def pivot( self, on: Sequence[str], @@ -332,6 +334,23 @@ def __narwhals_namespace__( def to_narwhals(self) -> DataFrame[NativeDataFrameT]: return self._version.dataframe(self, level="full") + def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny: + if backend is self._implementation: + return self + ns = self._version.namespace.from_backend(backend).compliant + if backend is Implementation.POLARS: + from narwhals._polars.dataframe import PolarsDataFrame + + return PolarsDataFrame.from_native(self.to_polars(), context=ns) + if backend is Implementation.PYARROW: + from narwhals._arrow.dataframe import ArrowDataFrame + + return ArrowDataFrame.from_native(self.to_arrow(), context=ns) + + from narwhals._pandas_like.dataframe import PandasLikeDataFrame + + return PandasLikeDataFrame._from_pandas(self.to_pandas(), context=ns) + def aggregate(self, *exprs: EagerExprT) -> Self: # pyright: ignore[reportIncompatibleMethodOverride] # NOTE: Ignore intermittent [False Negative] (1) # Method "aggregate" overrides class "CompliantLazyFrame" in an incompatible manner diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index d2afe6b2ce..3679039c39 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -143,6 +143,26 @@ def from_arrow(cls, data: IntoArrowTable, /, *, context: _LimitedContext) -> Sel raise AssertionError(msg) return cls.from_native(native, context=context) + @classmethod + def _from_pandas(cls, data: pd.DataFrame, /, *, context: _LimitedContext) -> Self: + """Construct a pandas-like frame from a native pandas DataFrame.""" + impl = context._implementation + if impl.is_pandas(): + native = data + elif impl.is_modin(): + ns = impl.to_native_namespace() + native = ns.DataFrame(data) + elif impl.is_cudf(): # pragma: no cover + ns = impl.to_native_namespace() + native = ns.from_pandas(data) + else: # pragma: no cover + msg = ( + "Can't instantiate PandasLikeDataFrame from pandas DataFrame and " + f"implementation {impl}" + ) + raise ValueError(msg) + return cls.from_native(native, context=context) + @classmethod def from_dict( cls, diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 0848967e9a..49a549c8a5 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -526,6 +526,21 @@ def iter_columns(self) -> Iterator[PolarsSeries]: for series in self.native.iter_columns(): yield PolarsSeries.from_native(series, context=self) + def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny: + native = self.native + if backend is Implementation.POLARS: + return PolarsDataFrame.from_native(native, context=self) + + ns = self._version.namespace.from_backend(backend).compliant + if backend is Implementation.PYARROW: + from narwhals._arrow.dataframe import ArrowDataFrame + + return ArrowDataFrame.from_native(native.to_arrow(), context=ns) + + from narwhals._pandas_like.dataframe import PandasLikeDataFrame + + return PandasLikeDataFrame._from_pandas(native.to_pandas(), context=ns) + def lazy( self, backend: _LazyAllowedImpl | None = None, diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index bd602efd52..d18b2587f2 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -22,7 +22,13 @@ check_expressions_preserve_length, is_scalar_like, ) -from narwhals._typing import Arrow, Pandas, _LazyAllowedImpl, _LazyFrameCollectImpl +from narwhals._typing import ( + Arrow, + Pandas, + _EagerAllowedImpl, + _LazyAllowedImpl, + _LazyFrameCollectImpl, +) from narwhals._utils import ( Implementation, Version, @@ -909,6 +915,49 @@ def to_pandas(self) -> pd.DataFrame: """ return self._compliant_frame.to_pandas() + def with_backend(self, backend: IntoBackend[EagerAllowed]) -> DataFrame[Any]: + """Converts the underlying native dataframe to the target backend without leaving the Narwhals layer. + + Arguments: + backend: The target eager backend. `backend` can be specified in various ways: + + - As `Implementation.` with `BACKEND` being `PANDAS`, + `MODIN`, `CUDF`, `PYARROW` or `POLARS`. + - As a string: `"pandas"`, `"modin"`, `"cudf"`, `"pyarrow"` or `"polars"`. + - Directly as a module `pandas`, `modin.pandas`, `cudf`, `pyarrow` or `polars`. + + Returns: + A Narwhals DataFrame backed by the requested library. + + Examples: + >>> import pandas as pd + >>> import narwhals as nw + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 6]}) + >>> df = nw.from_native(df_native) + >>> df.with_backend("polars") + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 2) | + | ┌─────┬─────┐ | + | │ a ┆ b │ | + | │ --- ┆ --- │ | + | │ i64 ┆ i64 │ | + | ╞═════╪═════╡ | + | │ 1 ┆ 4 │ | + | │ 2 ┆ 6 │ | + | └─────┴─────┘ | + └──────────────────┘ + """ + eager_backend = Implementation.from_backend(backend) + if not is_eager_allowed(eager_backend): + msg = ( + f"Unsupported `backend` value.\nExpected one of " + f"{get_args(_EagerAllowedImpl)}, got: {eager_backend}." + ) + raise ValueError(msg) + return self._with_compliant(self._compliant_frame.with_backend(eager_backend)) + def to_polars(self) -> pl.DataFrame: """Convert this DataFrame to a polars DataFrame. diff --git a/tests/frame/with_backend_test.py b/tests/frame/with_backend_test.py new file mode 100644 index 0000000000..3a5bf0e60e --- /dev/null +++ b/tests/frame/with_backend_test.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw +from narwhals._utils import Implementation +from tests.utils import assert_equal_data + +if TYPE_CHECKING: + from narwhals._typing import EagerAllowed + from tests.utils import ConstructorEager + + +data = {"a": [1, 2, 3], "b": ["x", "y", "z"]} + + +@pytest.mark.slow +@pytest.mark.parametrize( + "backend", + [ + Implementation.PANDAS, + Implementation.MODIN, + Implementation.CUDF, + Implementation.POLARS, + Implementation.PYARROW, + "pandas", + "polars", + "pyarrow", + "modin", + "cudf", + ], +) +def test_with_backend(constructor_eager: ConstructorEager, backend: EagerAllowed) -> None: + impl = Implementation.from_backend(backend) + pytest.importorskip(impl.name.lower()) + + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df.with_backend(backend=backend) + + assert isinstance(result, nw.DataFrame) + assert result.implementation == impl + assert_equal_data(result, data) + + +@pytest.mark.parametrize( + "backend", + [ + Implementation.DUCKDB, + Implementation.DASK, + Implementation.IBIS, + Implementation.PYSPARK, + Implementation.SQLFRAME, + "duckdb", + "dask", + "ibis", + "pyspark", + "sqlframe", + "garbage", + ], +) +def test_with_backend_invalid( + constructor_eager: ConstructorEager, backend: str | Implementation +) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + with pytest.raises(ValueError, match=re.escape("Unsupported `backend` value")): + df.with_backend(backend=backend) # type: ignore[arg-type]