Skip to content
Merged
48 changes: 44 additions & 4 deletions extensions/functions_arithmetic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1404,12 +1404,32 @@ aggregate_functions:
return: fp32?
- args:
- name: x
value: fp64
value: fp32
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
Comment thread
nielspardon marked this conversation as resolved.
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: x
value: fp64
options:
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- args:
- name: x
value: fp64
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- name: "variance"
Expand All @@ -1419,20 +1439,40 @@ aggregate_functions:
- name: x
value: fp32
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: x
value: fp64
value: fp32
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: x
value: fp64
options:
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- args:
- name: x
value: fp64
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- name: "corr"
Expand Down
53 changes: 53 additions & 0 deletions tests/cases/arithmetic/std_dev.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
### SUBSTRAIT_AGGREGATE_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp32, POPULATION::enum) = 1.4142135::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 1.4142135623730951::fp64?

# uniform_values: Standard deviation of uniform values
((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 0.0::fp32?
((5.0, 5.0, 5.0, 5.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64?

# single_value: Standard deviation with single value
((42.0)) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32?
((42.0)) std_dev(col0::fp64, POPULATION::enum) = 0.0::fp64?

# negative_values: Standard deviation with negative values
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp32, SAMPLE::enum) = 3.8944404::fp32?
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(col0::fp64, SAMPLE::enum) = 3.8944404818493075::fp64?
((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp32, POPULATION::enum) = 7.0710678::fp32?
((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(col0::fp64, POPULATION::enum) = 7.0710678118654755::fp64?

# decimal_precision: Standard deviation with decimal values
((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp32, SAMPLE::enum) = 1.5811388::fp32?
((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(col0::fp64, SAMPLE::enum) = 1.5811388300841898::fp64?
((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(col0::fp64, POPULATION::enum) = 0.14142135623730953::fp64?

# large_values: Standard deviation with large values
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp32, SAMPLE::enum) = 1581.1388::fp32?
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(col0::fp64, SAMPLE::enum) = 1581.1388300841898::fp64?

# small_values: Standard deviation with small values
((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, SAMPLE::enum) = 0.0015811388300841896::fp64?
((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(col0::fp64, POPULATION::enum) = 0.0014142135623730951::fp64?

# null_handling: Examples with null as input or output
((Null, Null, Null)) std_dev(col0::fp32?, SAMPLE::enum) = Null::fp32?
(()) std_dev(col0::fp32, SAMPLE::enum) = Null::fp32?
((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp32?, SAMPLE::enum) = 2.0::fp32?
((1.0, Null, 3.0, Null, 5.0)) std_dev(col0::fp64?, POPULATION::enum) = 1.632993161855452::fp64?

# rounding: Examples with different rounding modes
((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 1.7406897::fp32?
((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 1.7406897166664838::fp64?

# two_values: Standard deviation with two values
((10.0, 20.0)) std_dev(col0::fp32, SAMPLE::enum) = 7.071068::fp32?
((10.0, 20.0)) std_dev(col0::fp64, POPULATION::enum) = 5.0::fp64?

# mixed_range: Standard deviation with mixed range values
((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp32, SAMPLE::enum) = 41.010193::fp32?
((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(col0::fp64, POPULATION::enum) = 36.66060555964672::fp64?
61 changes: 61 additions & 0 deletions tests/cases/arithmetic/variance.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
### SUBSTRAIT_AGGREGATE_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp32, POPULATION::enum) = 2.0::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64?

# uniform_values: Variance of uniform values
((5.0, 5.0, 5.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 0.0::fp32?
((5.0, 5.0, 5.0, 5.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64?

# single_value: Variance with single value
((42.0)) variance(col0::fp32, SAMPLE::enum) = Null::fp32?
((42.0)) variance(col0::fp64, POPULATION::enum) = 0.0::fp64?

# negative_values: Variance with negative values
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp32, SAMPLE::enum) = 15.166667::fp32?
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(col0::fp64, SAMPLE::enum) = 15.166666666666666::fp64?
((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp32, POPULATION::enum) = 50.0::fp32?
((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(col0::fp64, POPULATION::enum) = 50.0::fp64?

# decimal_precision: Variance with decimal values
((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32?
((1.5, 2.5, 3.5, 4.5, 5.5)) variance(col0::fp64, SAMPLE::enum) = 2.5::fp64?
((0.1, 0.2, 0.3, 0.4, 0.5)) variance(col0::fp64, POPULATION::enum) = 0.020000000000000004::fp64?

# large_values: Variance with large values
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp32, SAMPLE::enum) = 2500000.0::fp32?
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(col0::fp64, SAMPLE::enum) = 2500000.0::fp64?

# small_values: Variance with small values
((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, SAMPLE::enum) = 0.0000025::fp64?
((0.001, 0.002, 0.003, 0.004, 0.005)) variance(col0::fp64, POPULATION::enum) = 0.000002::fp64?

# null_handling: Examples with null as input or output
((Null, Null, Null)) variance(col0::fp32?, SAMPLE::enum) = Null::fp32?
(()) variance(col0::fp32, SAMPLE::enum) = Null::fp32?
((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp32?, SAMPLE::enum) = 4.0::fp32?
((1.0, Null, 3.0, Null, 5.0)) variance(col0::fp64?, POPULATION::enum) = 2.666666666666667::fp64?

# rounding: Examples with different rounding modes
((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp32, SAMPLE::enum) [rounding:TIE_TO_EVEN] = 3.03::fp32?
((1.1, 2.2, 3.3, 4.4, 5.5)) variance(col0::fp64, SAMPLE::enum) [rounding:TRUNCATE] = 3.0299999999999994::fp64?

# two_values: Variance with two values
((10.0, 20.0)) variance(col0::fp32, SAMPLE::enum) = 50.0::fp32?
((10.0, 20.0)) variance(col0::fp64, POPULATION::enum) = 25.0::fp64?

# mixed_range: Variance with mixed range values
((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp32, SAMPLE::enum) = 1681.25::fp32?
((0.0, 100.0, 50.0, 25.0, 75.0)) variance(col0::fp64, POPULATION::enum) = 1345.0::fp64?

# zero_mean: Variance with values around zero
((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp32, SAMPLE::enum) = 2.5::fp32?
((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(col0::fp64, POPULATION::enum) = 2.0::fp64?

# three_values: Variance with three values
((10.0, 20.0, 30.0)) variance(col0::fp32, SAMPLE::enum) = 100.0::fp32?
((10.0, 20.0, 30.0)) variance(col0::fp64, POPULATION::enum) = 66.66666666666667::fp64?
23 changes: 21 additions & 2 deletions tests/coverage/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,29 @@ def is_return_type_error(self):
return isinstance(self.result, SubstraitError)

def get_arg_types(self):
return [arg.get_base_type() for arg in self.args]
types = []
for arg in self.args:
if isinstance(arg, CaseLiteral):
types.append(arg.get_base_type())
elif isinstance(arg, AggregateArgument):
# For aggregate arguments, use column_type if available, otherwise extract from scalar_value
if arg.column_type:
types.append(arg.column_type)
elif arg.scalar_value:
types.append(arg.scalar_value.get_base_type())
return types

def get_signature(self):
return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}"
arg_types = []
for arg in self.args:
if isinstance(arg, CaseLiteral):
arg_types.append(arg.type)
elif isinstance(arg, AggregateArgument):
if arg.column_type:
arg_types.append(arg.column_type)
elif arg.scalar_value:
arg_types.append(arg.scalar_value.type)
return f"{self.func_name}({', '.join(arg_types)}) = {self.get_return_type()}"


@dataclass
Expand Down
Loading