Skip to content
1 change: 1 addition & 0 deletions docs/api-reference/expr_struct.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
options:
members:
- field
- unnest
show_source: false
show_bases: false
1 change: 1 addition & 0 deletions docs/api-reference/series_struct.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
options:
members:
- field
- unnest
show_source: false
show_bases: false
18 changes: 18 additions & 0 deletions narwhals/_arrow/series_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,33 @@

from typing import TYPE_CHECKING

import pyarrow as pa
import pyarrow.compute as pc

from narwhals._arrow.utils import ArrowSeriesNamespace
from narwhals._compliant.any_namespace import StructNamespace

if TYPE_CHECKING:
from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries


class ArrowSeriesStructNamespace(ArrowSeriesNamespace, StructNamespace["ArrowSeries"]):
def field(self, name: str) -> ArrowSeries:
return self.with_native(pc.struct_field(self.native, name)).alias(name)

def unnest(self) -> ArrowDataFrame:
from narwhals._arrow.dataframe import ArrowDataFrame

native = self.native
struct_type: pa.StructType = native.type

# NOTE: struct_type.names is not available until pyarrow 18.0.0
n_fields = struct_type.num_fields
table = pa.table(
{
struct_type.field(idx).name: pc.struct_field(native, idx)
for idx in range(n_fields)
}
)
return ArrowDataFrame.from_native(table, context=self.compliant)
3 changes: 2 additions & 1 deletion narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, ClassVar, Protocol, TypeVar
from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeVar

from narwhals._utils import CompliantT_co, _StoresCompliant

Expand Down Expand Up @@ -115,3 +115,4 @@ class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
_accessor: ClassVar[Accessor] = "struct"

def field(self, name: str) -> CompliantT_co: ...
def unnest(self) -> Any: ...
32 changes: 31 additions & 1 deletion narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@

from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
from narwhals._compliant.series import CompliantSeries
from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries
from narwhals._compliant.typing import (
AliasNames,
EagerDataFrameAny,
EagerSeriesAny,
EvalNames,
EvalSeries,
)
from narwhals._expression_parsing import ExprMetadata
from narwhals._typing import NoDefault
from narwhals._utils import Implementation, Version, _LimitedContext
Expand Down Expand Up @@ -1184,3 +1190,27 @@ def field(self, name: str) -> EagerExprT:
return self.compliant._reuse_series_namespace("struct", "field", name=name).alias(
name
)

def unnest(self) -> EagerExprT:
compliant = self.compliant

def inner(df: EagerDataFrameAny) -> list[EagerSeriesAny]:
result: list[EagerSeriesAny] = []
for series in compliant(df):
unnested_df: EagerDataFrameAny = series.struct.unnest()
result.extend(
unnested_df.get_column(col_name) for col_name in unnested_df.columns
)
return result

def evaluate_output_names(df: EagerDataFrameAny) -> Sequence[str]:
return [
field.name for series in compliant(df) for field in series.dtype.fields
]

return self.compliant._from_callable(
inner,
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
context=compliant,
)
34 changes: 33 additions & 1 deletion narwhals/_duckdb/expr_struct.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import StructNamespace
from narwhals._duckdb.utils import F, lit

if TYPE_CHECKING:
from duckdb import Expression

from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals._duckdb.expr import DuckDBExpr
from narwhals.dtypes import Struct


class DuckDBExprStructNamespace(
Expand All @@ -17,3 +21,31 @@ def field(self, name: str) -> DuckDBExpr:
return self.compliant._with_elementwise(
lambda expr: F("struct_extract", expr, lit(name))
).alias(name)

def unnest(self) -> DuckDBExpr:
compliant = self.compliant

def func(df: DuckDBLazyFrame) -> list[Expression]:
schema = df.schema
return [
F("struct_extract", native_expr, lit(field.name)).alias(field.name)
for native_expr, name in zip(
compliant(df), compliant._evaluate_output_names(df)
)
for field in cast("Struct", schema[name]).fields
]

def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]:
schema = df.schema
return [
field.name
for name in compliant._evaluate_output_names(df)
for field in cast("Struct", schema[name]).fields
]

return compliant.__class__(
func,
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
version=compliant._version,
)
33 changes: 32 additions & 1 deletion narwhals/_ibis/expr_struct.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import StructNamespace

if TYPE_CHECKING:
import ibis.expr.types as ir

from narwhals._ibis.dataframe import IbisLazyFrame
from narwhals._ibis.expr import IbisExpr
from narwhals.dtypes import Struct


class IbisExprStructNamespace(LazyExprNamespace["IbisExpr"], StructNamespace["IbisExpr"]):
Expand All @@ -17,3 +19,32 @@ def func(expr: ir.StructColumn) -> ir.Column:
return expr[name]

return self.compliant._with_callable(func).alias(name)

def unnest(self) -> IbisExpr:
compliant = self.compliant

def func(df: IbisLazyFrame) -> list[ir.Column]:
schema = df.schema
return [
cast("ir.StructColumn", native_expr)[field.name].name(field.name)
for native_expr, name in zip(
compliant(df), compliant._evaluate_output_names(df)
)
for field in cast("Struct", schema[name]).fields
]

def evaluate_output_names(df: IbisLazyFrame) -> list[str]:
schema = df.schema
return [
field.name
for name in compliant._evaluate_output_names(df)
for field in cast("Struct", schema[name]).fields
]

return compliant.__class__(
func,
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
version=compliant._version,
implementation=compliant._implementation,
)
21 changes: 21 additions & 0 deletions narwhals/_pandas_like/series_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from narwhals._pandas_like.utils import PandasLikeSeriesNamespace

if TYPE_CHECKING:
import pyarrow as pa

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.series import PandasLikeSeries


Expand All @@ -14,3 +17,21 @@ class PandasLikeSeriesStructNamespace(
):
def field(self, name: str) -> PandasLikeSeries:
return self.with_native(self.native.struct.field(name)).alias(name)

def unnest(self) -> PandasLikeDataFrame:
from narwhals._pandas_like.dataframe import PandasLikeDataFrame

native = self.native
struct_type: pa.StructType = native.dtype.pyarrow_dtype

# NOTE: struct_type.names is not available until pyarrow 18.0.0
n_fields = struct_type.num_fields
ns = self.implementation.to_native_namespace()

result = ns.DataFrame(
{
struct_type.field(idx).name: native.struct.field(idx)
for idx in range(n_fields)
}
)
return PandasLikeDataFrame.from_native(result, context=self.compliant)
17 changes: 16 additions & 1 deletion narwhals/_polars/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,4 +521,19 @@ def contains(self, item: Any) -> PolarsExpr:

class PolarsExprStructNamespace(
PolarsExprNamespace, PolarsStructNamespace[PolarsExpr, pl.Expr]
): ...
):
def unnest(self) -> PolarsExpr:
native = self.native
pl_version = self._expr._backend_version
if pl_version >= (1, 10, 0):
result = native.struct.unnest()
elif pl_version >= (0, 20, 30): # pragma: no cover
result = native.struct.field("*")
else: # pragma: no cover
found = ".".join(f"{d}" for d in pl_version)
msg = (
"`Expr.struct.unnest` is only available in 'polars>=0.20.30',\n"
f"found version {found!r}."
)
raise NotImplementedError(msg)
return self.compliant._with_native(result)
7 changes: 6 additions & 1 deletion narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,4 +852,9 @@ def contains(self, item: NonNestedLiteral) -> PolarsSeries:

class PolarsSeriesStructNamespace(
PolarsSeriesNamespace, PolarsStructNamespace[PolarsSeries, pl.Series]
): ...
):
def unnest(self) -> PolarsDataFrame:
from narwhals._polars.dataframe import PolarsDataFrame

result = self.native.struct.unnest()
return PolarsDataFrame(result, version=self.compliant._version)
1 change: 1 addition & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,4 @@ def len(self) -> CompliantT: ...
class PolarsStructNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]):
_accessor: ClassVar[Accessor] = "struct"
field: Method[CompliantT]
unnest: Method[Any]
33 changes: 32 additions & 1 deletion narwhals/_spark_like/expr_struct.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import StructNamespace

if TYPE_CHECKING:
from sqlframe.base.column import Column

from narwhals._spark_like.dataframe import SparkLikeLazyFrame
from narwhals._spark_like.expr import SparkLikeExpr
from narwhals.dtypes import Struct


class SparkLikeExprStructNamespace(
Expand All @@ -19,3 +21,32 @@ def func(expr: Column) -> Column:
return expr.getField(name)

return self.compliant._with_elementwise(func).alias(name)

def unnest(self) -> SparkLikeExpr:
compliant = self.compliant

def func(df: SparkLikeLazyFrame) -> list[Column]:
schema = df.schema
return [
native_expr.getField(field.name).alias(field.name)
for native_expr, name in zip(
compliant(df), compliant._evaluate_output_names(df)
)
for field in cast("Struct", schema[name]).fields
]

def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]:
schema = df.schema
return [
field.name
for name in compliant._evaluate_output_names(df)
for field in cast("Struct", schema[name]).fields
]

return compliant.__class__(
func,
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
version=compliant._version,
implementation=compliant._implementation,
)
29 changes: 29 additions & 0 deletions narwhals/expr_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,32 @@ def field(self, name: str) -> ExprT:
return self._expr._append_node(
ExprNode(ExprKind.ELEMENTWISE, "struct.field", name=name)
)

def unnest(self) -> ExprT:
r"""Expand the struct column into individual fields as separate columns.

Each field of the struct becomes a separate column in the result.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame(
... {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("user").struct.unnest())
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
|Narwhals DataFrame|
|------------------|
| shape: (2, 2) |
| β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β” |
| β”‚ id ┆ name β”‚ |
| β”‚ --- ┆ --- β”‚ |
| β”‚ i64 ┆ str β”‚ |
| β•žβ•β•β•β•β•β•ͺ══════║ |
| β”‚ 0 ┆ john β”‚ |
| β”‚ 1 ┆ jane β”‚ |
| β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”˜ |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "struct.unnest"))
Loading
Loading