From a3bccfefa25a146156863afdcef4922c180aaa03 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 16:02:59 +0200 Subject: [PATCH 1/2] docs: Clarify .over(partition_by=None, order_by=...) --- narwhals/expr.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/narwhals/expr.py b/narwhals/expr.py index 9b6f616f71..48637cf3b7 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1279,9 +1279,17 @@ def over( partition_by: Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' `Expr.over`. + If not specified, the expression is computed over the entire frame + (i.e., no grouping is applied). order_by: Column(s) to order window functions by. For lazy backends, this argument is required when `over` is applied to order-dependent functions, see [order-dependence](../concepts/order_dependence.md). + When `order_by` is specified, the expression is evaluated on the frame + sorted by the given column(s), and, if applicable, the results are + returned with the original row order preserved. + + Warning: + At least one of `partition_by` or `order_by` must be provided. Examples: >>> import pandas as pd @@ -1310,6 +1318,22 @@ def over( |1 2 x 3| |2 4 y 4| └────────────────────────────┘ + + When `partition_by` is omitted, the expression is computed over the + entire frame. This is useful with `order_by` for order-dependent + operations without grouping: + + >>> df_native = pd.DataFrame({"a": [3, 1, 2], "b": ["x", "y", "z"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_cum_sum=nw.col("a").cum_sum().over(order_by="a")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b a_cum_sum| + |0 3 x 6| + |1 1 y 1| + |2 2 z 3| + └──────────────────┘ """ flat_partition_by = flatten(partition_by) flat_order_by = [order_by] if isinstance(order_by, str) else (order_by or []) From 554a5165ec06d17e81ae5ac3f67416207193d148 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 17:47:33 +0200 Subject: [PATCH 2/2] Warning -> Note; remove outdated example; pandas -> duckdb --- narwhals/expr.py | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 48637cf3b7..0315fd0846 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1288,7 +1288,7 @@ def over( sorted by the given column(s), and, if applicable, the results are returned with the original row order preserved. - Warning: + Note: At least one of `partition_by` or `order_by` must be provided. Examples: @@ -1306,34 +1306,32 @@ def over( |2 4 y 4| └────────────────────────┘ - Cumulative operations are also supported, but (currently) only for - pandas and Polars: - - >>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b")) - ┌────────────────────────────┐ - | Narwhals DataFrame | - |----------------------------| - | a b a_cum_sum_per_group| - |0 1 x 1| - |1 2 x 3| - |2 4 y 4| - └────────────────────────────┘ - When `partition_by` is omitted, the expression is computed over the entire frame. This is useful with `order_by` for order-dependent operations without grouping: - >>> df_native = pd.DataFrame({"a": [3, 1, 2], "b": ["x", "y", "z"]}) - >>> df = nw.from_native(df_native) - >>> df.with_columns(a_cum_sum=nw.col("a").cum_sum().over(order_by="a")) - ┌──────────────────┐ - |Narwhals DataFrame| - |------------------| - | a b a_cum_sum| - |0 3 x 6| - |1 1 y 1| - |2 2 z 3| - └──────────────────┘ + + >>> import duckdb + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = {"a": [3, 1, 2], "b": ["x", "y", "z"]} + >>> _table = pa.table(data) + >>> df = nw.from_native(duckdb.table("_table")) + >>> expr = nw.col("a").cum_sum().over(order_by="a") + >>> df.with_columns(a_cum_sum=expr).sort("a") + ┌───────────────────────────────┐ + | Narwhals LazyFrame | + |-------------------------------| + |┌───────┬─────────┬───────────┐| + |│ a │ b │ a_cum_sum │| + |│ int64 │ varchar │ int128 │| + |├───────┼─────────┼───────────┤| + |│ 1 │ y │ 1 │| + |│ 2 │ z │ 3 │| + |│ 3 │ x │ 6 │| + |└───────┴─────────┴───────────┘| + └───────────────────────────────┘ """ flat_partition_by = flatten(partition_by) flat_order_by = [order_by] if isinstance(order_by, str) else (order_by or [])