Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
- top_k
- unique
- unpivot
- with_backend
- with_columns
- with_row_index
- write_csv
Expand Down
14 changes: 14 additions & 0 deletions docs/basics/dataframe_conversion.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,17 @@ print(df_to_polars(df_duckdb))
```

It works to pass Polars to `backend` here because Polars supports the [PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for import.

## Switching eager backend while staying in Narwhals

`to_pandas`, `to_polars` and `to_arrow` return the native object, taking you out of the Narwhals layer.
If you want to keep chaining Narwhals methods, use `DataFrame.with_backend` instead:

```python exec="yes" source="above" session="conversion" result="python"
df = nw.from_native(df_pandas)
print(df.with_backend("polars").implementation)
print(df.with_backend("pyarrow").implementation)
```

`with_backend` accepts a string (`"pandas"`, `"modin"`, `"cudf"`, `"pyarrow"`, `"polars"`),
an `Implementation`, or a module.
21 changes: 20 additions & 1 deletion narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from narwhals._typing_compat import assert_never
from narwhals._utils import (
Implementation,
ValidateBackendVersion,
Version,
_StoresNative,
Expand Down Expand Up @@ -56,7 +57,7 @@
from narwhals._spark_like.utils import SparkSession
from narwhals._translate import IntoArrowTable
from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl
from narwhals._utils import Implementation, _LimitedContext
from narwhals._utils import _LimitedContext
from narwhals.dataframe import DataFrame
from narwhals.dtypes import DType
from narwhals.exceptions import ColumnNotFoundError
Expand Down Expand Up @@ -242,6 +243,7 @@ def is_unique(self) -> CompliantSeriesT: ...
def lazy(
self, backend: _LazyAllowedImpl | None, *, session: SparkSession | None
) -> CompliantLazyFrameAny: ...
def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny: ...
def pivot(
self,
on: Sequence[str],
Expand Down Expand Up @@ -332,6 +334,23 @@ def __narwhals_namespace__(
def to_narwhals(self) -> DataFrame[NativeDataFrameT]:
return self._version.dataframe(self, level="full")

def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny:
if backend is self._implementation:
return self
ns = self._version.namespace.from_backend(backend).compliant
if backend is Implementation.POLARS:
from narwhals._polars.dataframe import PolarsDataFrame

return PolarsDataFrame.from_native(self.to_polars(), context=ns)
if backend is Implementation.PYARROW:
from narwhals._arrow.dataframe import ArrowDataFrame

return ArrowDataFrame.from_native(self.to_arrow(), context=ns)

from narwhals._pandas_like.dataframe import PandasLikeDataFrame

return PandasLikeDataFrame._from_pandas(self.to_pandas(), context=ns)

def aggregate(self, *exprs: EagerExprT) -> Self: # pyright: ignore[reportIncompatibleMethodOverride]
# NOTE: Ignore intermittent [False Negative] (1)
# Method "aggregate" overrides class "CompliantLazyFrame" in an incompatible manner
Expand Down
20 changes: 20 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,26 @@ def from_arrow(cls, data: IntoArrowTable, /, *, context: _LimitedContext) -> Sel
raise AssertionError(msg)
return cls.from_native(native, context=context)

@classmethod
def _from_pandas(cls, data: pd.DataFrame, /, *, context: _LimitedContext) -> Self:
"""Construct a pandas-like frame from a native pandas DataFrame."""
impl = context._implementation
if impl.is_pandas():
native = data
elif impl.is_modin():
ns = impl.to_native_namespace()
native = ns.DataFrame(data)
elif impl.is_cudf(): # pragma: no cover
ns = impl.to_native_namespace()
native = ns.from_pandas(data)
else: # pragma: no cover
msg = (
"Can't instantiate PandasLikeDataFrame from pandas DataFrame and "
f"implementation {impl}"
)
raise ValueError(msg)
return cls.from_native(native, context=context)

@classmethod
def from_dict(
cls,
Expand Down
15 changes: 15 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,21 @@ def iter_columns(self) -> Iterator[PolarsSeries]:
for series in self.native.iter_columns():
yield PolarsSeries.from_native(series, context=self)

def with_backend(self, backend: _EagerAllowedImpl) -> CompliantDataFrameAny:
native = self.native
if backend is Implementation.POLARS:
return PolarsDataFrame.from_native(native, context=self)

ns = self._version.namespace.from_backend(backend).compliant
if backend is Implementation.PYARROW:
from narwhals._arrow.dataframe import ArrowDataFrame

return ArrowDataFrame.from_native(native.to_arrow(), context=ns)

from narwhals._pandas_like.dataframe import PandasLikeDataFrame

return PandasLikeDataFrame._from_pandas(native.to_pandas(), context=ns)

def lazy(
self,
backend: _LazyAllowedImpl | None = None,
Expand Down
51 changes: 50 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@
check_expressions_preserve_length,
is_scalar_like,
)
from narwhals._typing import Arrow, Pandas, _LazyAllowedImpl, _LazyFrameCollectImpl
from narwhals._typing import (
Arrow,
Pandas,
_EagerAllowedImpl,
_LazyAllowedImpl,
_LazyFrameCollectImpl,
)
from narwhals._utils import (
Implementation,
Version,
Expand Down Expand Up @@ -909,6 +915,49 @@ def to_pandas(self) -> pd.DataFrame:
"""
return self._compliant_frame.to_pandas()

def with_backend(self, backend: IntoBackend[EagerAllowed]) -> DataFrame[Any]:
"""Converts the underlying native dataframe to the target backend without leaving the Narwhals layer.

Arguments:
backend: The target eager backend. `backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`,
`MODIN`, `CUDF`, `PYARROW` or `POLARS`.
- As a string: `"pandas"`, `"modin"`, `"cudf"`, `"pyarrow"` or `"polars"`.
- Directly as a module `pandas`, `modin.pandas`, `cudf`, `pyarrow` or `polars`.

Returns:
A Narwhals DataFrame backed by the requested library.

Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_backend("polars")
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (2, 2) |
| ┌─────┬─────┐ |
| │ a ┆ b │ |
| │ --- ┆ --- │ |
| │ i64 ┆ i64 │ |
| ╞═════╪═════╡ |
| │ 1 ┆ 4 │ |
| │ 2 ┆ 6 │ |
| └─────┴─────┘ |
└──────────────────┘
"""
eager_backend = Implementation.from_backend(backend)
if not is_eager_allowed(eager_backend):
msg = (
f"Unsupported `backend` value.\nExpected one of "
f"{get_args(_EagerAllowedImpl)}, got: {eager_backend}."
)
raise ValueError(msg)
return self._with_compliant(self._compliant_frame.with_backend(eager_backend))

def to_polars(self) -> pl.DataFrame:
"""Convert this DataFrame to a polars DataFrame.

Expand Down
69 changes: 69 additions & 0 deletions tests/frame/with_backend_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING

import pytest

import narwhals as nw
from narwhals._utils import Implementation
from tests.utils import assert_equal_data

if TYPE_CHECKING:
from narwhals._typing import EagerAllowed
from tests.utils import ConstructorEager


data = {"a": [1, 2, 3], "b": ["x", "y", "z"]}


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
Implementation.POLARS,
Implementation.PYARROW,
"pandas",
"polars",
"pyarrow",
"modin",
"cudf",
],
)
def test_with_backend(constructor_eager: ConstructorEager, backend: EagerAllowed) -> None:
impl = Implementation.from_backend(backend)
pytest.importorskip(impl.name.lower())

df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_backend(backend=backend)

assert isinstance(result, nw.DataFrame)
assert result.implementation == impl
assert_equal_data(result, data)


@pytest.mark.parametrize(
"backend",
[
Implementation.DUCKDB,
Implementation.DASK,
Implementation.IBIS,
Implementation.PYSPARK,
Implementation.SQLFRAME,
"duckdb",
"dask",
"ibis",
"pyspark",
"sqlframe",
"garbage",
],
)
def test_with_backend_invalid(
constructor_eager: ConstructorEager, backend: str | Implementation
) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
with pytest.raises(ValueError, match=re.escape("Unsupported `backend` value")):
df.with_backend(backend=backend) # type: ignore[arg-type]
Loading