diff --git a/narwhals/expr.py b/narwhals/expr.py index 9b6f616f71..0315fd0846 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1279,9 +1279,17 @@ def over( partition_by: Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' `Expr.over`. + If not specified, the expression is computed over the entire frame + (i.e., no grouping is applied). order_by: Column(s) to order window functions by. For lazy backends, this argument is required when `over` is applied to order-dependent functions, see [order-dependence](../concepts/order_dependence.md). + When `order_by` is specified, the expression is evaluated on the frame + sorted by the given column(s), and, if applicable, the results are + returned with the original row order preserved. + + Note: + At least one of `partition_by` or `order_by` must be provided. Examples: >>> import pandas as pd @@ -1298,18 +1306,32 @@ def over( |2 4 y 4| └────────────────────────┘ - Cumulative operations are also supported, but (currently) only for - pandas and Polars: - - >>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b")) - ┌────────────────────────────┐ - | Narwhals DataFrame | - |----------------------------| - | a b a_cum_sum_per_group| - |0 1 x 1| - |1 2 x 3| - |2 4 y 4| - └────────────────────────────┘ + When `partition_by` is omitted, the expression is computed over the + entire frame. This is useful with `order_by` for order-dependent + operations without grouping: + + + >>> import duckdb + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = {"a": [3, 1, 2], "b": ["x", "y", "z"]} + >>> _table = pa.table(data) + >>> df = nw.from_native(duckdb.table("_table")) + >>> expr = nw.col("a").cum_sum().over(order_by="a") + >>> df.with_columns(a_cum_sum=expr).sort("a") + ┌───────────────────────────────┐ + | Narwhals LazyFrame | + |-------------------------------| + |┌───────┬─────────┬───────────┐| + |│ a │ b │ a_cum_sum │| + |│ int64 │ varchar │ int128 │| + |├───────┼─────────┼───────────┤| + |│ 1 │ y │ 1 │| + |│ 2 │ z │ 3 │| + |│ 3 │ x │ 6 │| + |└───────┴─────────┴───────────┘| + └───────────────────────────────┘ """ flat_partition_by = flatten(partition_by) flat_order_by = [order_by] if isinstance(order_by, str) else (order_by or [])