From 26ddf93fccf127c6f000a34ee4a987d9e05894c8 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Tue, 17 Mar 2026 09:32:59 +0100 Subject: [PATCH 01/11] fix(extensions)!: change distribution option to enum arg for std_dev and variance BREAKING CHANGE: changes the function signature of existing functions std_dev and variance Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 16 ++++---- tests/cases/arithmetic/std_dev.test | 53 ++++++++++++++++++++++++ tests/cases/arithmetic/variance.test | 61 ++++++++++++++++++++++++++++ tests/coverage/nodes.py | 23 ++++++++++- 4 files changed, 143 insertions(+), 10 deletions(-) create mode 100644 tests/cases/arithmetic/std_dev.test create mode 100644 tests/cases/arithmetic/variance.test diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 8746f0392..7f03eed96 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1395,21 +1395,21 @@ aggregate_functions: - args: - name: x value: fp32 + - name: distribution + options: [ SAMPLE, POPULATION] options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] nullability: DECLARED_OUTPUT return: fp32? - args: - name: x value: fp64 + - name: distribution + options: [ SAMPLE, POPULATION] options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] nullability: DECLARED_OUTPUT return: fp64? - name: "variance" @@ -1418,21 +1418,21 @@ aggregate_functions: - args: - name: x value: fp32 + - name: distribution + options: [ SAMPLE, POPULATION] options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] nullability: DECLARED_OUTPUT return: fp32? - args: - name: x value: fp64 + - name: distribution + options: [ SAMPLE, POPULATION] options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] nullability: DECLARED_OUTPUT return: fp64? - name: "corr" diff --git a/tests/cases/arithmetic/std_dev.test b/tests/cases/arithmetic/std_dev.test new file mode 100644 index 000000000..076fb36c6 --- /dev/null +++ b/tests/cases/arithmetic/std_dev.test @@ -0,0 +1,53 @@ +### SUBSTRAIT_AGGREGATE_TEST: v1.0 +### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' + +# basic: Basic examples without any special cases +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32 +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64 +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, POPULATION::enum) = 1.4142135::fp32 +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.4142135623730951::fp64 + +# uniform_values: Standard deviation of uniform values +((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 0.0::fp32 +((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64 + +# single_value: Standard deviation with single value +((42.0)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 +((42.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64 + +# negative_values: Standard deviation with negative values +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 3.8944404::fp32 +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 3.8944404818493075::fp64 +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp32, POPULATION::enum) = 7.0710678::fp32 +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp64, POPULATION::enum) = 7.0710678118654755::fp64 + +# decimal_precision: Standard deviation with decimal values +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32 +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64 +((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(col0::fp64, POPULATION::enum) = 0.14142135623730953::fp64 + +# large_values: Standard deviation with large values +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp32, SAMPLE::enum) = 1581.1388::fp32 +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp64, SAMPLE::enum) = 1581.1388300841898::fp64 + +# small_values: Standard deviation with small values +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, SAMPLE::enum) = 0.0015811388300841896::fp64 +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, POPULATION::enum) = 0.0014142135623730951::fp64 + +# null_handling: Examples with null as input or output +((Null, Null, Null)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 +(()) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 +((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 2.0::fp32 +((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.632993161855452::fp64 + +# rounding: Examples with different rounding modes +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 1.7406897::fp32 +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 1.7406897166664838::fp64 + +# two_values: Standard deviation with two values +((10.0, 20.0)) std_dev(col0::fp32, SAMPLE::enum) = 7.071068::fp32 +((10.0, 20.0)) std_dev(col0::fp64, POPULATION::enum) = 5.0::fp64 + +# mixed_range: Standard deviation with mixed range values +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp32, SAMPLE::enum) = 41.010193::fp32 +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp64, POPULATION::enum) = 36.66060555964672::fp64 \ No newline at end of file diff --git a/tests/cases/arithmetic/variance.test b/tests/cases/arithmetic/variance.test new file mode 100644 index 000000000..47fac986a --- /dev/null +++ b/tests/cases/arithmetic/variance.test @@ -0,0 +1,61 @@ +### SUBSTRAIT_AGGREGATE_TEST: v1.0 +### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' + +# basic: Basic examples without any special cases +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64 +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, POPULATION::enum) = 2.0::fp32 +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64 + +# uniform_values: Variance of uniform values +((5.0, 5.0, 5.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 0.0::fp32 +((5.0, 5.0, 5.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64 + +# single_value: Variance with single value +((42.0)) variance(col0::fp32, SAMPLE::enum) = Null::fp32 +((42.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64 + +# negative_values: Variance with negative values +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 15.166667::fp32 +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 15.166666666666666::fp64 +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp32, POPULATION::enum) = 50.0::fp32 +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp64, POPULATION::enum) = 50.0::fp64 + +# decimal_precision: Variance with decimal values +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64 +((0.1, 0.2, 0.3, 0.4, 0.5)) variance(col0::fp64, POPULATION::enum) = 0.020000000000000004::fp64 + +# large_values: Variance with large values +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp32, SAMPLE::enum) = 2500000.0::fp32 +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp64, SAMPLE::enum) = 2500000.0::fp64 + +# small_values: Variance with small values +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, SAMPLE::enum) = 0.0000025::fp64 +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, POPULATION::enum) = 0.000002::fp64 + +# null_handling: Examples with null as input or output +((Null, Null, Null)) variance(col0::fp32, SAMPLE::enum) = Null::fp32 +(()) variance(col0::fp32, SAMPLE::enum) = Null::fp32 +((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp32, SAMPLE::enum) = 4.0::fp32 +((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.666666666666667::fp64 + +# rounding: Examples with different rounding modes +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 3.03::fp32 +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 3.0299999999999994::fp64 + +# two_values: Variance with two values +((10.0, 20.0)) variance(col0::fp32, SAMPLE::enum) = 50.0::fp32 +((10.0, 20.0)) variance(col0::fp64, POPULATION::enum) = 25.0::fp64 + +# mixed_range: Variance with mixed range values +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp32, SAMPLE::enum) = 1681.25::fp32 +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp64, POPULATION::enum) = 1345.0::fp64 + +# zero_mean: Variance with values around zero +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64 + +# three_values: Variance with three values +((10.0, 20.0, 30.0)) variance(col0::fp32, SAMPLE::enum) = 100.0::fp32 +((10.0, 20.0, 30.0)) variance(col0::fp64, POPULATION::enum) = 66.66666666666667::fp64 \ No newline at end of file diff --git a/tests/coverage/nodes.py b/tests/coverage/nodes.py index bee4be838..d1151bd31 100644 --- a/tests/coverage/nodes.py +++ b/tests/coverage/nodes.py @@ -85,10 +85,29 @@ def is_return_type_error(self): return isinstance(self.result, SubstraitError) def get_arg_types(self): - return [arg.get_base_type() for arg in self.args] + types = [] + for arg in self.args: + if isinstance(arg, CaseLiteral): + types.append(arg.get_base_type()) + elif isinstance(arg, AggregateArgument): + # For aggregate arguments, use column_type if available, otherwise extract from scalar_value + if arg.column_type: + types.append(arg.column_type) + elif arg.scalar_value: + types.append(arg.scalar_value.get_base_type()) + return types def get_signature(self): - return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}" + arg_types = [] + for arg in self.args: + if isinstance(arg, CaseLiteral): + arg_types.append(arg.type) + elif isinstance(arg, AggregateArgument): + if arg.column_type: + arg_types.append(arg.column_type) + elif arg.scalar_value: + arg_types.append(arg.scalar_value.type) + return f"{self.func_name}({', '.join(arg_types)}) = {self.get_return_type()}" @dataclass From 9b5f945bcd8c93a9cf9e46658eff08b1087e2b15 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Thu, 19 Mar 2026 08:54:34 +0100 Subject: [PATCH 02/11] fix: add deprecated flag and deprecate old sigs Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 44 ++++++++++++++++++++++++++ text/simple_extensions_schema.yaml | 47 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 7f03eed96..f442fb123 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1392,6 +1392,17 @@ aggregate_functions: - name: "std_dev" description: Calculates standard-deviation for a set of values. impls: + - deprecated: true + args: + - name: x + value: fp32 + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + distribution: + values: [ SAMPLE, POPULATION] + nullability: DECLARED_OUTPUT + return: fp32? - args: - name: x value: fp32 @@ -1402,6 +1413,17 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? + - deprecated: true + args: + - name: x + value: fp64 + options: + distribution: + values: [ SAMPLE, POPULATION] + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? - args: - name: x value: fp64 @@ -1415,6 +1437,17 @@ aggregate_functions: - name: "variance" description: Calculates variance for a set of values. impls: + - deprecated: true + args: + - name: x + value: fp32 + options: + distribution: + values: [ SAMPLE, POPULATION] + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp32? - args: - name: x value: fp32 @@ -1425,6 +1458,17 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? + - deprecated: true + args: + - name: x + value: fp64 + options: + distribution: + values: [ SAMPLE, POPULATION] + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? - args: - name: x value: fp64 diff --git a/text/simple_extensions_schema.yaml b/text/simple_extensions_schema.yaml index dd737246d..44db6bf6a 100644 --- a/text/simple_extensions_schema.yaml +++ b/text/simple_extensions_schema.yaml @@ -41,6 +41,11 @@ properties: $ref: "#/$defs/type_param_defs" variadic: # when set, last parameter may be specified one or more times type: boolean + deprecated: + type: boolean + description: >- + true if this type has been deprecated, otherwise false (default) + default: false type_variations: type: array minItems: 1 @@ -60,6 +65,12 @@ properties: functions: type: string enum: [INHERITS, SEPARATE] + deprecated: + type: boolean + description: >- + true if this type variation has been deprecated, + otherwise false (default) + default: false scalar_functions: type: array items: @@ -273,6 +284,18 @@ $defs: $ref: "#/$defs/returnValue" implementation: $ref: "#/$defs/implementation" + deprecated: + type: boolean + description: >- + true if this scalar function signature has been deprecated, + otherwise false (default) + default: false + deprecated: + type: boolean + description: >- + true if this scalar function has been deprecated, + otherwise false (default) + default: false aggregateFunction: type: object additionalProperties: false @@ -322,6 +345,18 @@ $defs: $ref: "#/$defs/maxset" decomposable: $ref: "#/$defs/decomposable" + deprecated: + type: boolean + description: >- + true if this aggregate function signature has been deprecated, + otherwise false (default) + default: false + deprecated: + type: boolean + description: >- + true if this aggregate function has been deprecated, + otherwise false (default) + default: false windowFunction: type: object additionalProperties: false @@ -374,3 +409,15 @@ $defs: window_type: type: string enum: [STREAMING, PARTITION] + deprecated: + type: boolean + description: >- + true if this window function signature has been deprecated, + otherwise false (default) + default: false + deprecated: + type: boolean + description: >- + true if this window function has been deprecated, + otherwise false (default) + default: false From eed9e419a5a06126969dcc779a255c70d3547dd4 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Thu, 19 Mar 2026 10:07:32 +0100 Subject: [PATCH 03/11] doc: add deprecated field to docs Signed-off-by: Niels Pardon --- site/docs/expressions/scalar_functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/docs/expressions/scalar_functions.md b/site/docs/expressions/scalar_functions.md index 212a32df8..6d6dd7410 100644 --- a/site/docs/expressions/scalar_functions.md +++ b/site/docs/expressions/scalar_functions.md @@ -12,6 +12,7 @@ A function is a scalar function if that function takes in values from a single r | Nullability Handling | Describes how nullability of input arguments maps to nullability of output arguments. Three options are: `MIRROR`, `DECLARED_OUTPUT` and `DISCRETE`. More details about nullability handling are listed below. | Optional, defaults to `MIRROR` | | Description | Additional description of function for implementers or users. Should be written human-readable to allow exposure to end users. Can be specified at the function level and/or on individual implementations to document overload-specific behavior. | Optional | | Return Value | The output type of the expression. Return types can be expressed as a fully-defined type or a type expression. See below for more on type expressions. | Required | +| deprecated | Indicates whether this function or function signature has been deprecated. | Optional, defaults to false | | Implementation Map | A map of implementation locations for one or more implementations of the given function. Each key is a function implementation type. Implementation types include examples such as: AthenaArrowLambda, TrinoV361Jar, ArrowCppKernelEnum, GandivaEnum, LinkedIn Transport Jar, etc. [Definition TBD]. Implementation type has one or more properties associated with retrieval of that implementation. | Optional | From 02dd601fd020425de2a130436e0db8c8708a4342 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Fri, 20 Mar 2026 20:24:12 +0100 Subject: [PATCH 04/11] fix: use new deprecation mechanism Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 8 +++- site/docs/expressions/scalar_functions.md | 1 - text/simple_extensions_schema.yaml | 47 ----------------------- 3 files changed, 6 insertions(+), 50 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index f442fb123..1cd189ce3 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1392,7 +1392,9 @@ aggregate_functions: - name: "std_dev" description: Calculates standard-deviation for a set of values. impls: - - deprecated: true + - deprecated: + since: "0.86.0" + reason: Use implementation with distribution enum argument instead. args: - name: x value: fp32 @@ -1413,7 +1415,9 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? - - deprecated: true + - deprecated: + since: "0.86.0" + reason: Use implementation with distribution enum argument instead. args: - name: x value: fp64 diff --git a/site/docs/expressions/scalar_functions.md b/site/docs/expressions/scalar_functions.md index 6d6dd7410..212a32df8 100644 --- a/site/docs/expressions/scalar_functions.md +++ b/site/docs/expressions/scalar_functions.md @@ -12,7 +12,6 @@ A function is a scalar function if that function takes in values from a single r | Nullability Handling | Describes how nullability of input arguments maps to nullability of output arguments. Three options are: `MIRROR`, `DECLARED_OUTPUT` and `DISCRETE`. More details about nullability handling are listed below. | Optional, defaults to `MIRROR` | | Description | Additional description of function for implementers or users. Should be written human-readable to allow exposure to end users. Can be specified at the function level and/or on individual implementations to document overload-specific behavior. | Optional | | Return Value | The output type of the expression. Return types can be expressed as a fully-defined type or a type expression. See below for more on type expressions. | Required | -| deprecated | Indicates whether this function or function signature has been deprecated. | Optional, defaults to false | | Implementation Map | A map of implementation locations for one or more implementations of the given function. Each key is a function implementation type. Implementation types include examples such as: AthenaArrowLambda, TrinoV361Jar, ArrowCppKernelEnum, GandivaEnum, LinkedIn Transport Jar, etc. [Definition TBD]. Implementation type has one or more properties associated with retrieval of that implementation. | Optional | diff --git a/text/simple_extensions_schema.yaml b/text/simple_extensions_schema.yaml index 44db6bf6a..dd737246d 100644 --- a/text/simple_extensions_schema.yaml +++ b/text/simple_extensions_schema.yaml @@ -41,11 +41,6 @@ properties: $ref: "#/$defs/type_param_defs" variadic: # when set, last parameter may be specified one or more times type: boolean - deprecated: - type: boolean - description: >- - true if this type has been deprecated, otherwise false (default) - default: false type_variations: type: array minItems: 1 @@ -65,12 +60,6 @@ properties: functions: type: string enum: [INHERITS, SEPARATE] - deprecated: - type: boolean - description: >- - true if this type variation has been deprecated, - otherwise false (default) - default: false scalar_functions: type: array items: @@ -284,18 +273,6 @@ $defs: $ref: "#/$defs/returnValue" implementation: $ref: "#/$defs/implementation" - deprecated: - type: boolean - description: >- - true if this scalar function signature has been deprecated, - otherwise false (default) - default: false - deprecated: - type: boolean - description: >- - true if this scalar function has been deprecated, - otherwise false (default) - default: false aggregateFunction: type: object additionalProperties: false @@ -345,18 +322,6 @@ $defs: $ref: "#/$defs/maxset" decomposable: $ref: "#/$defs/decomposable" - deprecated: - type: boolean - description: >- - true if this aggregate function signature has been deprecated, - otherwise false (default) - default: false - deprecated: - type: boolean - description: >- - true if this aggregate function has been deprecated, - otherwise false (default) - default: false windowFunction: type: object additionalProperties: false @@ -409,15 +374,3 @@ $defs: window_type: type: string enum: [STREAMING, PARTITION] - deprecated: - type: boolean - description: >- - true if this window function signature has been deprecated, - otherwise false (default) - default: false - deprecated: - type: boolean - description: >- - true if this window function has been deprecated, - otherwise false (default) - default: false From 8e570e20a3ee2df4fe6f61bd86b4ebaee890223b Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Fri, 20 Mar 2026 20:30:50 +0100 Subject: [PATCH 05/11] fix: also change variance function to new deprecation Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 1cd189ce3..ec5f762fb 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1441,7 +1441,9 @@ aggregate_functions: - name: "variance" description: Calculates variance for a set of values. impls: - - deprecated: true + - deprecated: + since: "0.86.0" + reason: Use implementation with distribution enum argument instead. args: - name: x value: fp32 @@ -1462,7 +1464,9 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? - - deprecated: true + - deprecated: + since: "0.86.0" + reason: Use implementation with distribution enum argument instead. args: - name: x value: fp64 From 1330e33a596e48a45eac1676178bbf7f6ee19a17 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Fri, 20 Mar 2026 20:41:09 +0100 Subject: [PATCH 06/11] fix: test case nullability Signed-off-by: Niels Pardon --- tests/cases/arithmetic/std_dev.test | 58 ++++++++++++------------ tests/cases/arithmetic/variance.test | 66 ++++++++++++++-------------- 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/tests/cases/arithmetic/std_dev.test b/tests/cases/arithmetic/std_dev.test index 076fb36c6..2a5310651 100644 --- a/tests/cases/arithmetic/std_dev.test +++ b/tests/cases/arithmetic/std_dev.test @@ -2,52 +2,52 @@ ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' # basic: Basic examples without any special cases -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32 -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64 -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, POPULATION::enum) = 1.4142135::fp32 -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.4142135623730951::fp64 +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, POPULATION::enum) = 1.4142135::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.4142135623730951::fp64? # uniform_values: Standard deviation of uniform values -((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 0.0::fp32 -((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64 +((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 0.0::fp32? +((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64? # single_value: Standard deviation with single value -((42.0)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 -((42.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64 +((42.0)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32? +((42.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64? # negative_values: Standard deviation with negative values -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 3.8944404::fp32 -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 3.8944404818493075::fp64 -((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp32, POPULATION::enum) = 7.0710678::fp32 -((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp64, POPULATION::enum) = 7.0710678118654755::fp64 +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 3.8944404::fp32? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 3.8944404818493075::fp64? +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp32, POPULATION::enum) = 7.0710678::fp32? +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp64, POPULATION::enum) = 7.0710678118654755::fp64? # decimal_precision: Standard deviation with decimal values -((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32 -((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64 -((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(col0::fp64, POPULATION::enum) = 0.14142135623730953::fp64 +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32? +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64? +((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(col0::fp64, POPULATION::enum) = 0.14142135623730953::fp64? # large_values: Standard deviation with large values -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp32, SAMPLE::enum) = 1581.1388::fp32 -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp64, SAMPLE::enum) = 1581.1388300841898::fp64 +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp32, SAMPLE::enum) = 1581.1388::fp32? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp64, SAMPLE::enum) = 1581.1388300841898::fp64? # small_values: Standard deviation with small values -((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, SAMPLE::enum) = 0.0015811388300841896::fp64 -((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, POPULATION::enum) = 0.0014142135623730951::fp64 +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, SAMPLE::enum) = 0.0015811388300841896::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, POPULATION::enum) = 0.0014142135623730951::fp64? # null_handling: Examples with null as input or output -((Null, Null, Null)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 -(()) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32 -((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 2.0::fp32 -((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.632993161855452::fp64 +((Null, Null, Null)) std_dev(col0::fp32?, SAMPLE::enum) = Null::fp32? +(()) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32? +((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp32?, SAMPLE::enum) = 2.0::fp32? +((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp64?, POPULATION::enum) = 1.632993161855452::fp64? # rounding: Examples with different rounding modes -((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 1.7406897::fp32 -((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 1.7406897166664838::fp64 +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 1.7406897::fp32? +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 1.7406897166664838::fp64? # two_values: Standard deviation with two values -((10.0, 20.0)) std_dev(col0::fp32, SAMPLE::enum) = 7.071068::fp32 -((10.0, 20.0)) std_dev(col0::fp64, POPULATION::enum) = 5.0::fp64 +((10.0, 20.0)) std_dev(col0::fp32, SAMPLE::enum) = 7.071068::fp32? +((10.0, 20.0)) std_dev(col0::fp64, POPULATION::enum) = 5.0::fp64? # mixed_range: Standard deviation with mixed range values -((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp32, SAMPLE::enum) = 41.010193::fp32 -((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp64, POPULATION::enum) = 36.66060555964672::fp64 \ No newline at end of file +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp32, SAMPLE::enum) = 41.010193::fp32? +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp64, POPULATION::enum) = 36.66060555964672::fp64? diff --git a/tests/cases/arithmetic/variance.test b/tests/cases/arithmetic/variance.test index 47fac986a..49f5ed063 100644 --- a/tests/cases/arithmetic/variance.test +++ b/tests/cases/arithmetic/variance.test @@ -2,60 +2,60 @@ ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' # basic: Basic examples without any special cases -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64 -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, POPULATION::enum) = 2.0::fp32 -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64 +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, POPULATION::enum) = 2.0::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64? # uniform_values: Variance of uniform values -((5.0, 5.0, 5.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 0.0::fp32 -((5.0, 5.0, 5.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64 +((5.0, 5.0, 5.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 0.0::fp32? +((5.0, 5.0, 5.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64? # single_value: Variance with single value -((42.0)) variance(col0::fp32, SAMPLE::enum) = Null::fp32 -((42.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64 +((42.0)) variance(col0::fp32, SAMPLE::enum) = Null::fp32? +((42.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64? # negative_values: Variance with negative values -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 15.166667::fp32 -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 15.166666666666666::fp64 -((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp32, POPULATION::enum) = 50.0::fp32 -((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp64, POPULATION::enum) = 50.0::fp64 +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 15.166667::fp32? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 15.166666666666666::fp64? +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp32, POPULATION::enum) = 50.0::fp32? +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp64, POPULATION::enum) = 50.0::fp64? # decimal_precision: Variance with decimal values -((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 -((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64 -((0.1, 0.2, 0.3, 0.4, 0.5)) variance(col0::fp64, POPULATION::enum) = 0.020000000000000004::fp64 +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64? +((0.1, 0.2, 0.3, 0.4, 0.5)) variance(col0::fp64, POPULATION::enum) = 0.020000000000000004::fp64? # large_values: Variance with large values -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp32, SAMPLE::enum) = 2500000.0::fp32 -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp64, SAMPLE::enum) = 2500000.0::fp64 +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp32, SAMPLE::enum) = 2500000.0::fp32? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp64, SAMPLE::enum) = 2500000.0::fp64? # small_values: Variance with small values -((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, SAMPLE::enum) = 0.0000025::fp64 -((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, POPULATION::enum) = 0.000002::fp64 +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, SAMPLE::enum) = 0.0000025::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, POPULATION::enum) = 0.000002::fp64? # null_handling: Examples with null as input or output -((Null, Null, Null)) variance(col0::fp32, SAMPLE::enum) = Null::fp32 -(()) variance(col0::fp32, SAMPLE::enum) = Null::fp32 -((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp32, SAMPLE::enum) = 4.0::fp32 -((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.666666666666667::fp64 +((Null, Null, Null)) variance(col0::fp32?, SAMPLE::enum) = Null::fp32? +(()) variance(col0::fp32, SAMPLE::enum) = Null::fp32? +((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp32?, SAMPLE::enum) = 4.0::fp32? +((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp64?, POPULATION::enum) = 2.666666666666667::fp64? # rounding: Examples with different rounding modes -((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 3.03::fp32 -((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 3.0299999999999994::fp64 +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 3.03::fp32? +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 3.0299999999999994::fp64? # two_values: Variance with two values -((10.0, 20.0)) variance(col0::fp32, SAMPLE::enum) = 50.0::fp32 -((10.0, 20.0)) variance(col0::fp64, POPULATION::enum) = 25.0::fp64 +((10.0, 20.0)) variance(col0::fp32, SAMPLE::enum) = 50.0::fp32? +((10.0, 20.0)) variance(col0::fp64, POPULATION::enum) = 25.0::fp64? # mixed_range: Variance with mixed range values -((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp32, SAMPLE::enum) = 1681.25::fp32 -((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp64, POPULATION::enum) = 1345.0::fp64 +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp32, SAMPLE::enum) = 1681.25::fp32? +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp64, POPULATION::enum) = 1345.0::fp64? # zero_mean: Variance with values around zero -((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32 -((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64 +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64? # three_values: Variance with three values -((10.0, 20.0, 30.0)) variance(col0::fp32, SAMPLE::enum) = 100.0::fp32 -((10.0, 20.0, 30.0)) variance(col0::fp64, POPULATION::enum) = 66.66666666666667::fp64 \ No newline at end of file +((10.0, 20.0, 30.0)) variance(col0::fp32, SAMPLE::enum) = 100.0::fp32? +((10.0, 20.0, 30.0)) variance(col0::fp64, POPULATION::enum) = 66.66666666666667::fp64? From b364972e5e94ef7fdafe717c467350f9718db800 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Mon, 23 Mar 2026 18:51:13 +0100 Subject: [PATCH 07/11] fix: update deprecation version Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index ec5f762fb..c9a598050 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1393,7 +1393,7 @@ aggregate_functions: description: Calculates standard-deviation for a set of values. impls: - deprecated: - since: "0.86.0" + since: "0.87.0" reason: Use implementation with distribution enum argument instead. args: - name: x @@ -1416,7 +1416,7 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp32? - deprecated: - since: "0.86.0" + since: "0.87.0" reason: Use implementation with distribution enum argument instead. args: - name: x @@ -1442,7 +1442,7 @@ aggregate_functions: description: Calculates variance for a set of values. impls: - deprecated: - since: "0.86.0" + since: "0.87.0" reason: Use implementation with distribution enum argument instead. args: - name: x @@ -1465,7 +1465,7 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp32? - deprecated: - since: "0.86.0" + since: "0.87.0" reason: Use implementation with distribution enum argument instead. args: - name: x From 6275d2318b2d456014e13a81d14a84c88d202a5b Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Mon, 23 Mar 2026 19:04:26 +0100 Subject: [PATCH 08/11] fix: remove deprecated field Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index c9a598050..f261b8b3f 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1392,10 +1392,7 @@ aggregate_functions: - name: "std_dev" description: Calculates standard-deviation for a set of values. impls: - - deprecated: - since: "0.87.0" - reason: Use implementation with distribution enum argument instead. - args: + - args: - name: x value: fp32 options: @@ -1415,10 +1412,7 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? - - deprecated: - since: "0.87.0" - reason: Use implementation with distribution enum argument instead. - args: + - args: - name: x value: fp64 options: @@ -1441,10 +1435,7 @@ aggregate_functions: - name: "variance" description: Calculates variance for a set of values. impls: - - deprecated: - since: "0.87.0" - reason: Use implementation with distribution enum argument instead. - args: + - args: - name: x value: fp32 options: @@ -1464,10 +1455,7 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp32? - - deprecated: - since: "0.87.0" - reason: Use implementation with distribution enum argument instead. - args: + - args: - name: x value: fp64 options: From 8175a7300e6bb896a3ac861ef52e5f3a90bd6a80 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Tue, 24 Mar 2026 09:33:36 +0100 Subject: [PATCH 09/11] fix: reverse argument order Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 16 +++---- tests/cases/arithmetic/std_dev.test | 58 ++++++++++++------------ tests/cases/arithmetic/variance.test | 66 ++++++++++++++-------------- 3 files changed, 70 insertions(+), 70 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index f261b8b3f..079f8fa98 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1403,10 +1403,10 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp32? - args: - - name: x - value: fp32 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: fp32 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] @@ -1423,10 +1423,10 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: fp64 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: fp64 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] @@ -1446,10 +1446,10 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp32? - args: - - name: x - value: fp32 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: fp32 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] @@ -1466,10 +1466,10 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: fp64 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: fp64 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] diff --git a/tests/cases/arithmetic/std_dev.test b/tests/cases/arithmetic/std_dev.test index 2a5310651..abbd57e35 100644 --- a/tests/cases/arithmetic/std_dev.test +++ b/tests/cases/arithmetic/std_dev.test @@ -2,52 +2,52 @@ ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' # basic: Basic examples without any special cases -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32? -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64? -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, POPULATION::enum) = 1.4142135::fp32? -((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.4142135623730951::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 1.5811388::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(SAMPLE::enum, col0::fp64) = 1.5811388300841898::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(POPULATION::enum, col0::fp32) = 1.4142135::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(POPULATION::enum, col0::fp64) = 1.4142135623730951::fp64? # uniform_values: Standard deviation of uniform values -((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 0.0::fp32? -((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64? +((5.0, 5.0, 5.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 0.0::fp32? +((5.0, 5.0, 5.0, 5.0)) std_dev(POPULATION::enum, col0::fp64) = 0.0::fp64? # single_value: Standard deviation with single value -((42.0)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32? -((42.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64? +((42.0)) std_dev(SAMPLE::enum, col0::fp32) = Null::fp32? +((42.0)) std_dev(POPULATION::enum, col0::fp64) = 0.0::fp64? # negative_values: Standard deviation with negative values -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 3.8944404::fp32? -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 3.8944404818493075::fp64? -((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp32, POPULATION::enum) = 7.0710678::fp32? -((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp64, POPULATION::enum) = 7.0710678118654755::fp64? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 3.8944404::fp32? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(SAMPLE::enum, col0::fp64) = 3.8944404818493075::fp64? +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(POPULATION::enum, col0::fp32) = 7.0710678::fp32? +((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(POPULATION::enum, col0::fp64) = 7.0710678118654755::fp64? # decimal_precision: Standard deviation with decimal values -((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32? -((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64? -((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(col0::fp64, POPULATION::enum) = 0.14142135623730953::fp64? +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(SAMPLE::enum, col0::fp32) = 1.5811388::fp32? +((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(SAMPLE::enum, col0::fp64) = 1.5811388300841898::fp64? +((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(POPULATION::enum, col0::fp64) = 0.14142135623730953::fp64? # large_values: Standard deviation with large values -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp32, SAMPLE::enum) = 1581.1388::fp32? -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp64, SAMPLE::enum) = 1581.1388300841898::fp64? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(SAMPLE::enum, col0::fp32) = 1581.1388::fp32? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(SAMPLE::enum, col0::fp64) = 1581.1388300841898::fp64? # small_values: Standard deviation with small values -((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, SAMPLE::enum) = 0.0015811388300841896::fp64? -((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, POPULATION::enum) = 0.0014142135623730951::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(SAMPLE::enum, col0::fp64) = 0.0015811388300841896::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(POPULATION::enum, col0::fp64) = 0.0014142135623730951::fp64? # null_handling: Examples with null as input or output -((Null, Null, Null)) std_dev(col0::fp32?, SAMPLE::enum) = Null::fp32? -(()) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32? -((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp32?, SAMPLE::enum) = 2.0::fp32? -((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp64?, POPULATION::enum) = 1.632993161855452::fp64? +((Null, Null, Null)) std_dev(SAMPLE::enum, col0::fp32?) = Null::fp32? +(()) std_dev(SAMPLE::enum, col0::fp32) = Null::fp32? +((1.0, Null, 3.0, Null, 5.0)) std_dev(SAMPLE::enum, col0::fp32?) = 2.0::fp32? +((1.0, Null, 3.0, Null, 5.0)) std_dev(POPULATION::enum, col0::fp64?) = 1.632993161855452::fp64? # rounding: Examples with different rounding modes -((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 1.7406897::fp32? -((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 1.7406897166664838::fp64? +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(SAMPLE::enum, col0::fp32) [rounding:TIE_TO_EVEN] = 1.7406897::fp32? +((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(SAMPLE::enum, col0::fp64) [rounding:TRUNCATE] = 1.7406897166664838::fp64? # two_values: Standard deviation with two values -((10.0, 20.0)) std_dev(col0::fp32, SAMPLE::enum) = 7.071068::fp32? -((10.0, 20.0)) std_dev(col0::fp64, POPULATION::enum) = 5.0::fp64? +((10.0, 20.0)) std_dev(SAMPLE::enum, col0::fp32) = 7.071068::fp32? +((10.0, 20.0)) std_dev(POPULATION::enum, col0::fp64) = 5.0::fp64? # mixed_range: Standard deviation with mixed range values -((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp32, SAMPLE::enum) = 41.010193::fp32? -((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp64, POPULATION::enum) = 36.66060555964672::fp64? +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(SAMPLE::enum, col0::fp32) = 41.010193::fp32? +((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(POPULATION::enum, col0::fp64) = 36.66060555964672::fp64? diff --git a/tests/cases/arithmetic/variance.test b/tests/cases/arithmetic/variance.test index 49f5ed063..d2cc50eef 100644 --- a/tests/cases/arithmetic/variance.test +++ b/tests/cases/arithmetic/variance.test @@ -2,60 +2,60 @@ ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' # basic: Basic examples without any special cases -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64? -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, POPULATION::enum) = 2.0::fp32? -((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(SAMPLE::enum, col0::fp64) = 2.5::fp64? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(POPULATION::enum, col0::fp32) = 2.0::fp32? +((1.0, 2.0, 3.0, 4.0, 5.0)) variance(POPULATION::enum, col0::fp64) = 2.0::fp64? # uniform_values: Variance of uniform values -((5.0, 5.0, 5.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 0.0::fp32? -((5.0, 5.0, 5.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64? +((5.0, 5.0, 5.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 0.0::fp32? +((5.0, 5.0, 5.0, 5.0)) variance(POPULATION::enum, col0::fp64) = 0.0::fp64? # single_value: Variance with single value -((42.0)) variance(col0::fp32, SAMPLE::enum) = Null::fp32? -((42.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64? +((42.0)) variance(SAMPLE::enum, col0::fp32) = Null::fp32? +((42.0)) variance(POPULATION::enum, col0::fp64) = 0.0::fp64? # negative_values: Variance with negative values -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 15.166667::fp32? -((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 15.166666666666666::fp64? -((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp32, POPULATION::enum) = 50.0::fp32? -((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp64, POPULATION::enum) = 50.0::fp64? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 15.166667::fp32? +((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(SAMPLE::enum, col0::fp64) = 15.166666666666666::fp64? +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(POPULATION::enum, col0::fp32) = 50.0::fp32? +((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(POPULATION::enum, col0::fp64) = 50.0::fp64? # decimal_precision: Variance with decimal values -((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? -((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64? -((0.1, 0.2, 0.3, 0.4, 0.5)) variance(col0::fp64, POPULATION::enum) = 0.020000000000000004::fp64? +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32? +((1.5, 2.5, 3.5, 4.5, 5.5)) variance(SAMPLE::enum, col0::fp64) = 2.5::fp64? +((0.1, 0.2, 0.3, 0.4, 0.5)) variance(POPULATION::enum, col0::fp64) = 0.020000000000000004::fp64? # large_values: Variance with large values -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp32, SAMPLE::enum) = 2500000.0::fp32? -((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp64, SAMPLE::enum) = 2500000.0::fp64? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(SAMPLE::enum, col0::fp32) = 2500000.0::fp32? +((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(SAMPLE::enum, col0::fp64) = 2500000.0::fp64? # small_values: Variance with small values -((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, SAMPLE::enum) = 0.0000025::fp64? -((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, POPULATION::enum) = 0.000002::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(SAMPLE::enum, col0::fp64) = 0.0000025::fp64? +((0.001, 0.002, 0.003, 0.004, 0.005)) variance(POPULATION::enum, col0::fp64) = 0.000002::fp64? # null_handling: Examples with null as input or output -((Null, Null, Null)) variance(col0::fp32?, SAMPLE::enum) = Null::fp32? -(()) variance(col0::fp32, SAMPLE::enum) = Null::fp32? -((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp32?, SAMPLE::enum) = 4.0::fp32? -((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp64?, POPULATION::enum) = 2.666666666666667::fp64? +((Null, Null, Null)) variance(SAMPLE::enum, col0::fp32?) = Null::fp32? +(()) variance(SAMPLE::enum, col0::fp32) = Null::fp32? +((1.0, Null, 3.0, Null, 5.0)) variance(SAMPLE::enum, col0::fp32?) = 4.0::fp32? +((1.0, Null, 3.0, Null, 5.0)) variance(POPULATION::enum, col0::fp64?) = 2.666666666666667::fp64? # rounding: Examples with different rounding modes -((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 3.03::fp32? -((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 3.0299999999999994::fp64? +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(SAMPLE::enum, col0::fp32) [rounding:TIE_TO_EVEN] = 3.03::fp32? +((1.1, 2.2, 3.3, 4.4, 5.5)) variance(SAMPLE::enum, col0::fp64) [rounding:TRUNCATE] = 3.0299999999999994::fp64? # two_values: Variance with two values -((10.0, 20.0)) variance(col0::fp32, SAMPLE::enum) = 50.0::fp32? -((10.0, 20.0)) variance(col0::fp64, POPULATION::enum) = 25.0::fp64? +((10.0, 20.0)) variance(SAMPLE::enum, col0::fp32) = 50.0::fp32? +((10.0, 20.0)) variance(POPULATION::enum, col0::fp64) = 25.0::fp64? # mixed_range: Variance with mixed range values -((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp32, SAMPLE::enum) = 1681.25::fp32? -((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp64, POPULATION::enum) = 1345.0::fp64? +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(SAMPLE::enum, col0::fp32) = 1681.25::fp32? +((0.0, 100.0, 50.0, 25.0, 75.0)) variance(POPULATION::enum, col0::fp64) = 1345.0::fp64? # zero_mean: Variance with values around zero -((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32? -((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64? +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32? +((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(POPULATION::enum, col0::fp64) = 2.0::fp64? # three_values: Variance with three values -((10.0, 20.0, 30.0)) variance(col0::fp32, SAMPLE::enum) = 100.0::fp32? -((10.0, 20.0, 30.0)) variance(col0::fp64, POPULATION::enum) = 66.66666666666667::fp64? +((10.0, 20.0, 30.0)) variance(SAMPLE::enum, col0::fp32) = 100.0::fp32? +((10.0, 20.0, 30.0)) variance(POPULATION::enum, col0::fp64) = 66.66666666666667::fp64? From 2911777b471ef7e75217e9b6670843e6d65e41bd Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Tue, 17 Mar 2026 13:31:48 +0100 Subject: [PATCH 10/11] feat(extensions): support int arguments with std_dev and variance functions Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 80 ++++++++++++++++++++++++++++ tests/cases/arithmetic/std_dev.test | 27 ++++++++++ tests/cases/arithmetic/variance.test | 35 ++++++++++++ 3 files changed, 142 insertions(+) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 079f8fa98..fbeadd78c 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1432,6 +1432,46 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? + - args: + - name: x + value: i8 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i16 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i32 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i64 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? - name: "variance" description: Calculates variance for a set of values. impls: @@ -1475,6 +1515,46 @@ aggregate_functions: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? + - args: + - name: x + value: i8 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i16 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i32 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? + - args: + - name: x + value: i64 + - name: distribution + options: [ SAMPLE, POPULATION] + options: + rounding: + values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + nullability: DECLARED_OUTPUT + return: fp64? - name: "corr" description: > Calculates the value of Pearson's correlation coefficient between `x` and `y`. diff --git a/tests/cases/arithmetic/std_dev.test b/tests/cases/arithmetic/std_dev.test index abbd57e35..f951555a6 100644 --- a/tests/cases/arithmetic/std_dev.test +++ b/tests/cases/arithmetic/std_dev.test @@ -51,3 +51,30 @@ # mixed_range: Standard deviation with mixed range values ((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(SAMPLE::enum, col0::fp32) = 41.010193::fp32? ((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(POPULATION::enum, col0::fp64) = 36.66060555964672::fp64? + + +# integer_types: Standard deviation with integer input types returning fp64 +((1, 2, 3, 4, 5)) std_dev(col0::i8, SAMPLE::enum) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i8, POPULATION::enum) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i16, SAMPLE::enum) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i16, POPULATION::enum) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i32, SAMPLE::enum) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i32, POPULATION::enum) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i64, SAMPLE::enum) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(col0::i64, POPULATION::enum) = 1.4142135623730951::fp64? + +# integer_negative: Standard deviation with negative integer values +((-5, -3, -1, 1, 3, 5)) std_dev(col0::i32, SAMPLE::enum) = 3.8944404818493075::fp64? +((-10, -5, 0, 5, 10)) std_dev(col0::i64, POPULATION::enum) = 7.0710678118654755::fp64? + +# integer_uniform: Standard deviation with uniform integer values +((42, 42, 42, 42)) std_dev(col0::i16, SAMPLE::enum) = 0.0::fp64? +((100, 100, 100, 100)) std_dev(col0::i32, POPULATION::enum) = 0.0::fp64? + +# integer_single: Standard deviation with single integer value +((7)) std_dev(col0::i8, SAMPLE::enum) = Null::fp64? +((99)) std_dev(col0::i64, POPULATION::enum) = 0.0::fp64? + +# integer_large_values: Standard deviation with large integer values +((1000, 2000, 3000, 4000, 5000)) std_dev(col0::i32, SAMPLE::enum) = 1581.1388300841898::fp64? +((100, 200, 300, 400, 500)) std_dev(col0::i64, POPULATION::enum) = 141.4213562373095::fp64? diff --git a/tests/cases/arithmetic/variance.test b/tests/cases/arithmetic/variance.test index d2cc50eef..626752cd5 100644 --- a/tests/cases/arithmetic/variance.test +++ b/tests/cases/arithmetic/variance.test @@ -59,3 +59,38 @@ # three_values: Variance with three values ((10.0, 20.0, 30.0)) variance(SAMPLE::enum, col0::fp32) = 100.0::fp32? ((10.0, 20.0, 30.0)) variance(POPULATION::enum, col0::fp64) = 66.66666666666667::fp64? + + +# integer_types: Variance with integer input types returning fp64 +((1, 2, 3, 4, 5)) variance(col0::i8, SAMPLE::enum) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(col0::i8, POPULATION::enum) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(col0::i16, SAMPLE::enum) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(col0::i16, POPULATION::enum) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(col0::i32, SAMPLE::enum) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(col0::i32, POPULATION::enum) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(col0::i64, SAMPLE::enum) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(col0::i64, POPULATION::enum) = 2.0::fp64? + +# integer_negative: Variance with negative integer values +((-5, -3, -1, 1, 3, 5)) variance(col0::i32, SAMPLE::enum) = 15.166666666666666::fp64? +((-10, -5, 0, 5, 10)) variance(col0::i64, POPULATION::enum) = 50.0::fp64? + +# integer_uniform: Variance with uniform integer values +((42, 42, 42, 42)) variance(col0::i16, SAMPLE::enum) = 0.0::fp64? +((100, 100, 100, 100)) variance(col0::i32, POPULATION::enum) = 0.0::fp64? + +# integer_single: Variance with single integer value +((7)) variance(col0::i8, SAMPLE::enum) = Null::fp64? +((99)) variance(col0::i64, POPULATION::enum) = 0.0::fp64? + +# integer_large_values: Variance with large integer values +((1000, 2000, 3000, 4000, 5000)) variance(col0::i32, SAMPLE::enum) = 2500000.0::fp64? +((100, 200, 300, 400, 500)) variance(col0::i64, POPULATION::enum) = 20000.0::fp64? + +# integer_zero_mean: Variance with integer values around zero +((-2, -1, 0, 1, 2)) variance(col0::i16, SAMPLE::enum) = 2.5::fp64? +((-2, -1, 0, 1, 2)) variance(col0::i64, POPULATION::enum) = 2.0::fp64? + +# integer_three_values: Variance with three integer values +((10, 20, 30)) variance(col0::i32, SAMPLE::enum) = 100.0::fp64? +((10, 20, 30)) variance(col0::i8, POPULATION::enum) = 66.66666666666667::fp64? From a9b2d92f11e39e7f83dfa4ea429ed6c5466e18a3 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Tue, 24 Mar 2026 09:35:52 +0100 Subject: [PATCH 11/11] fix: reverse argument order Signed-off-by: Niels Pardon --- extensions/functions_arithmetic.yaml | 32 +++++++++++----------- tests/cases/arithmetic/std_dev.test | 32 +++++++++++----------- tests/cases/arithmetic/variance.test | 40 ++++++++++++++-------------- 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index fbeadd78c..ae4429a78 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -1433,40 +1433,40 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i8 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i8 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i16 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i16 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i32 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i32 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i64 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i64 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] @@ -1516,40 +1516,40 @@ aggregate_functions: nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i8 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i8 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i16 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i16 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i32 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i32 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] nullability: DECLARED_OUTPUT return: fp64? - args: - - name: x - value: i64 - name: distribution options: [ SAMPLE, POPULATION] + - name: x + value: i64 options: rounding: values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] diff --git a/tests/cases/arithmetic/std_dev.test b/tests/cases/arithmetic/std_dev.test index f951555a6..d50d86a61 100644 --- a/tests/cases/arithmetic/std_dev.test +++ b/tests/cases/arithmetic/std_dev.test @@ -54,27 +54,27 @@ # integer_types: Standard deviation with integer input types returning fp64 -((1, 2, 3, 4, 5)) std_dev(col0::i8, SAMPLE::enum) = 1.5811388300841898::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i8, POPULATION::enum) = 1.4142135623730951::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i16, SAMPLE::enum) = 1.5811388300841898::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i16, POPULATION::enum) = 1.4142135623730951::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i32, SAMPLE::enum) = 1.5811388300841898::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i32, POPULATION::enum) = 1.4142135623730951::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i64, SAMPLE::enum) = 1.5811388300841898::fp64? -((1, 2, 3, 4, 5)) std_dev(col0::i64, POPULATION::enum) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(SAMPLE::enum, col0::i8) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(POPULATION::enum, col0::i8) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(SAMPLE::enum, col0::i16) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(POPULATION::enum, col0::i16) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(SAMPLE::enum, col0::i32) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(POPULATION::enum, col0::i32) = 1.4142135623730951::fp64? +((1, 2, 3, 4, 5)) std_dev(SAMPLE::enum, col0::i64) = 1.5811388300841898::fp64? +((1, 2, 3, 4, 5)) std_dev(POPULATION::enum, col0::i64) = 1.4142135623730951::fp64? # integer_negative: Standard deviation with negative integer values -((-5, -3, -1, 1, 3, 5)) std_dev(col0::i32, SAMPLE::enum) = 3.8944404818493075::fp64? -((-10, -5, 0, 5, 10)) std_dev(col0::i64, POPULATION::enum) = 7.0710678118654755::fp64? +((-5, -3, -1, 1, 3, 5)) std_dev(SAMPLE::enum, col0::i32) = 3.8944404818493075::fp64? +((-10, -5, 0, 5, 10)) std_dev(POPULATION::enum, col0::i64) = 7.0710678118654755::fp64? # integer_uniform: Standard deviation with uniform integer values -((42, 42, 42, 42)) std_dev(col0::i16, SAMPLE::enum) = 0.0::fp64? -((100, 100, 100, 100)) std_dev(col0::i32, POPULATION::enum) = 0.0::fp64? +((42, 42, 42, 42)) std_dev(SAMPLE::enum, col0::i16) = 0.0::fp64? +((100, 100, 100, 100)) std_dev(POPULATION::enum, col0::i32) = 0.0::fp64? # integer_single: Standard deviation with single integer value -((7)) std_dev(col0::i8, SAMPLE::enum) = Null::fp64? -((99)) std_dev(col0::i64, POPULATION::enum) = 0.0::fp64? +((7)) std_dev(SAMPLE::enum, col0::i8) = Null::fp64? +((99)) std_dev(POPULATION::enum, col0::i64) = 0.0::fp64? # integer_large_values: Standard deviation with large integer values -((1000, 2000, 3000, 4000, 5000)) std_dev(col0::i32, SAMPLE::enum) = 1581.1388300841898::fp64? -((100, 200, 300, 400, 500)) std_dev(col0::i64, POPULATION::enum) = 141.4213562373095::fp64? +((1000, 2000, 3000, 4000, 5000)) std_dev(SAMPLE::enum, col0::i32) = 1581.1388300841898::fp64? +((100, 200, 300, 400, 500)) std_dev(POPULATION::enum, col0::i64) = 141.4213562373095::fp64? diff --git a/tests/cases/arithmetic/variance.test b/tests/cases/arithmetic/variance.test index 626752cd5..5a6cc6022 100644 --- a/tests/cases/arithmetic/variance.test +++ b/tests/cases/arithmetic/variance.test @@ -62,35 +62,35 @@ # integer_types: Variance with integer input types returning fp64 -((1, 2, 3, 4, 5)) variance(col0::i8, SAMPLE::enum) = 2.5::fp64? -((1, 2, 3, 4, 5)) variance(col0::i8, POPULATION::enum) = 2.0::fp64? -((1, 2, 3, 4, 5)) variance(col0::i16, SAMPLE::enum) = 2.5::fp64? -((1, 2, 3, 4, 5)) variance(col0::i16, POPULATION::enum) = 2.0::fp64? -((1, 2, 3, 4, 5)) variance(col0::i32, SAMPLE::enum) = 2.5::fp64? -((1, 2, 3, 4, 5)) variance(col0::i32, POPULATION::enum) = 2.0::fp64? -((1, 2, 3, 4, 5)) variance(col0::i64, SAMPLE::enum) = 2.5::fp64? -((1, 2, 3, 4, 5)) variance(col0::i64, POPULATION::enum) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(SAMPLE::enum, col0::i8) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(POPULATION::enum, col0::i8) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(SAMPLE::enum, col0::i16) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(POPULATION::enum, col0::i16) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(SAMPLE::enum, col0::i32) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(POPULATION::enum, col0::i32) = 2.0::fp64? +((1, 2, 3, 4, 5)) variance(SAMPLE::enum, col0::i64) = 2.5::fp64? +((1, 2, 3, 4, 5)) variance(POPULATION::enum, col0::i64) = 2.0::fp64? # integer_negative: Variance with negative integer values -((-5, -3, -1, 1, 3, 5)) variance(col0::i32, SAMPLE::enum) = 15.166666666666666::fp64? -((-10, -5, 0, 5, 10)) variance(col0::i64, POPULATION::enum) = 50.0::fp64? +((-5, -3, -1, 1, 3, 5)) variance(SAMPLE::enum, col0::i32) = 15.166666666666666::fp64? +((-10, -5, 0, 5, 10)) variance(POPULATION::enum, col0::i64) = 50.0::fp64? # integer_uniform: Variance with uniform integer values -((42, 42, 42, 42)) variance(col0::i16, SAMPLE::enum) = 0.0::fp64? -((100, 100, 100, 100)) variance(col0::i32, POPULATION::enum) = 0.0::fp64? +((42, 42, 42, 42)) variance(SAMPLE::enum, col0::i16) = 0.0::fp64? +((100, 100, 100, 100)) variance(POPULATION::enum, col0::i32) = 0.0::fp64? # integer_single: Variance with single integer value -((7)) variance(col0::i8, SAMPLE::enum) = Null::fp64? -((99)) variance(col0::i64, POPULATION::enum) = 0.0::fp64? +((7)) variance(SAMPLE::enum, col0::i8) = Null::fp64? +((99)) variance(POPULATION::enum, col0::i64) = 0.0::fp64? # integer_large_values: Variance with large integer values -((1000, 2000, 3000, 4000, 5000)) variance(col0::i32, SAMPLE::enum) = 2500000.0::fp64? -((100, 200, 300, 400, 500)) variance(col0::i64, POPULATION::enum) = 20000.0::fp64? +((1000, 2000, 3000, 4000, 5000)) variance(SAMPLE::enum, col0::i32) = 2500000.0::fp64? +((100, 200, 300, 400, 500)) variance(POPULATION::enum, col0::i64) = 20000.0::fp64? # integer_zero_mean: Variance with integer values around zero -((-2, -1, 0, 1, 2)) variance(col0::i16, SAMPLE::enum) = 2.5::fp64? -((-2, -1, 0, 1, 2)) variance(col0::i64, POPULATION::enum) = 2.0::fp64? +((-2, -1, 0, 1, 2)) variance(SAMPLE::enum, col0::i16) = 2.5::fp64? +((-2, -1, 0, 1, 2)) variance(POPULATION::enum, col0::i64) = 2.0::fp64? # integer_three_values: Variance with three integer values -((10, 20, 30)) variance(col0::i32, SAMPLE::enum) = 100.0::fp64? -((10, 20, 30)) variance(col0::i8, POPULATION::enum) = 66.66666666666667::fp64? +((10, 20, 30)) variance(SAMPLE::enum, col0::i32) = 100.0::fp64? +((10, 20, 30)) variance(POPULATION::enum, col0::i8) = 66.66666666666667::fp64?