Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions extensions/functions_arithmetic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1403,13 +1403,33 @@ aggregate_functions:
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
Comment thread
nielspardon marked this conversation as resolved.
- name: x
value: fp64
value: fp32
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: x
value: fp64
options:
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- args:
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
- name: x
value: fp64
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- name: "variance"
Expand All @@ -1419,20 +1439,40 @@ aggregate_functions:
- name: x
value: fp32
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
- name: x
value: fp64
value: fp32
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp32?
- args:
- name: x
value: fp64
options:
distribution:
values: [ SAMPLE, POPULATION]
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- args:
- name: distribution
options: [ SAMPLE, POPULATION]
Comment thread
nielspardon marked this conversation as resolved.
- name: x
value: fp64
options:
rounding:
values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
nullability: DECLARED_OUTPUT
return: fp64?
- name: "corr"
Expand Down
53 changes: 53 additions & 0 deletions tests/cases/arithmetic/std_dev.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
### SUBSTRAIT_AGGREGATE_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 1.5811388::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(SAMPLE::enum, col0::fp64) = 1.5811388300841898::fp64?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(POPULATION::enum, col0::fp32) = 1.4142135::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) std_dev(POPULATION::enum, col0::fp64) = 1.4142135623730951::fp64?

# uniform_values: Standard deviation of uniform values
((5.0, 5.0, 5.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 0.0::fp32?
((5.0, 5.0, 5.0, 5.0)) std_dev(POPULATION::enum, col0::fp64) = 0.0::fp64?

# single_value: Standard deviation with single value
((42.0)) std_dev(SAMPLE::enum, col0::fp32) = Null::fp32?
((42.0)) std_dev(POPULATION::enum, col0::fp64) = 0.0::fp64?

# negative_values: Standard deviation with negative values
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(SAMPLE::enum, col0::fp32) = 3.8944404::fp32?
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) std_dev(SAMPLE::enum, col0::fp64) = 3.8944404818493075::fp64?
((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(POPULATION::enum, col0::fp32) = 7.0710678::fp32?
((-10.0, -5.0, 0.0, 5.0, 10.0)) std_dev(POPULATION::enum, col0::fp64) = 7.0710678118654755::fp64?

# decimal_precision: Standard deviation with decimal values
((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(SAMPLE::enum, col0::fp32) = 1.5811388::fp32?
((1.5, 2.5, 3.5, 4.5, 5.5)) std_dev(SAMPLE::enum, col0::fp64) = 1.5811388300841898::fp64?
((0.1, 0.2, 0.3, 0.4, 0.5)) std_dev(POPULATION::enum, col0::fp64) = 0.14142135623730953::fp64?

# large_values: Standard deviation with large values
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(SAMPLE::enum, col0::fp32) = 1581.1388::fp32?
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) std_dev(SAMPLE::enum, col0::fp64) = 1581.1388300841898::fp64?

# small_values: Standard deviation with small values
((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(SAMPLE::enum, col0::fp64) = 0.0015811388300841896::fp64?
((0.001, 0.002, 0.003, 0.004, 0.005)) std_dev(POPULATION::enum, col0::fp64) = 0.0014142135623730951::fp64?

# null_handling: Examples with null as input or output
((Null, Null, Null)) std_dev(SAMPLE::enum, col0::fp32?) = Null::fp32?
(()) std_dev(SAMPLE::enum, col0::fp32) = Null::fp32?
((1.0, Null, 3.0, Null, 5.0)) std_dev(SAMPLE::enum, col0::fp32?) = 2.0::fp32?
((1.0, Null, 3.0, Null, 5.0)) std_dev(POPULATION::enum, col0::fp64?) = 1.632993161855452::fp64?

# rounding: Examples with different rounding modes
((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(SAMPLE::enum, col0::fp32) [rounding:TIE_TO_EVEN] = 1.7406897::fp32?
((1.1, 2.2, 3.3, 4.4, 5.5)) std_dev(SAMPLE::enum, col0::fp64) [rounding:TRUNCATE] = 1.7406897166664838::fp64?

# two_values: Standard deviation with two values
((10.0, 20.0)) std_dev(SAMPLE::enum, col0::fp32) = 7.071068::fp32?
((10.0, 20.0)) std_dev(POPULATION::enum, col0::fp64) = 5.0::fp64?

# mixed_range: Standard deviation with mixed range values
((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(SAMPLE::enum, col0::fp32) = 41.010193::fp32?
((0.0, 100.0, 50.0, 25.0, 75.0)) std_dev(POPULATION::enum, col0::fp64) = 36.66060555964672::fp64?
61 changes: 61 additions & 0 deletions tests/cases/arithmetic/variance.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
### SUBSTRAIT_AGGREGATE_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(SAMPLE::enum, col0::fp64) = 2.5::fp64?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(POPULATION::enum, col0::fp32) = 2.0::fp32?
((1.0, 2.0, 3.0, 4.0, 5.0)) variance(POPULATION::enum, col0::fp64) = 2.0::fp64?

# uniform_values: Variance of uniform values
((5.0, 5.0, 5.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 0.0::fp32?
((5.0, 5.0, 5.0, 5.0)) variance(POPULATION::enum, col0::fp64) = 0.0::fp64?

# single_value: Variance with single value
((42.0)) variance(SAMPLE::enum, col0::fp32) = Null::fp32?
((42.0)) variance(POPULATION::enum, col0::fp64) = 0.0::fp64?

# negative_values: Variance with negative values
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(SAMPLE::enum, col0::fp32) = 15.166667::fp32?
((-5.0, -3.0, -1.0, 1.0, 3.0, 5.0)) variance(SAMPLE::enum, col0::fp64) = 15.166666666666666::fp64?
((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(POPULATION::enum, col0::fp32) = 50.0::fp32?
((-10.0, -5.0, 0.0, 5.0, 10.0)) variance(POPULATION::enum, col0::fp64) = 50.0::fp64?

# decimal_precision: Variance with decimal values
((1.5, 2.5, 3.5, 4.5, 5.5)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32?
((1.5, 2.5, 3.5, 4.5, 5.5)) variance(SAMPLE::enum, col0::fp64) = 2.5::fp64?
((0.1, 0.2, 0.3, 0.4, 0.5)) variance(POPULATION::enum, col0::fp64) = 0.020000000000000004::fp64?

# large_values: Variance with large values
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(SAMPLE::enum, col0::fp32) = 2500000.0::fp32?
((1000.0, 2000.0, 3000.0, 4000.0, 5000.0)) variance(SAMPLE::enum, col0::fp64) = 2500000.0::fp64?

# small_values: Variance with small values
((0.001, 0.002, 0.003, 0.004, 0.005)) variance(SAMPLE::enum, col0::fp64) = 0.0000025::fp64?
((0.001, 0.002, 0.003, 0.004, 0.005)) variance(POPULATION::enum, col0::fp64) = 0.000002::fp64?

# null_handling: Examples with null as input or output
((Null, Null, Null)) variance(SAMPLE::enum, col0::fp32?) = Null::fp32?
(()) variance(SAMPLE::enum, col0::fp32) = Null::fp32?
((1.0, Null, 3.0, Null, 5.0)) variance(SAMPLE::enum, col0::fp32?) = 4.0::fp32?
((1.0, Null, 3.0, Null, 5.0)) variance(POPULATION::enum, col0::fp64?) = 2.666666666666667::fp64?

# rounding: Examples with different rounding modes
((1.1, 2.2, 3.3, 4.4, 5.5)) variance(SAMPLE::enum, col0::fp32) [rounding:TIE_TO_EVEN] = 3.03::fp32?
((1.1, 2.2, 3.3, 4.4, 5.5)) variance(SAMPLE::enum, col0::fp64) [rounding:TRUNCATE] = 3.0299999999999994::fp64?

# two_values: Variance with two values
((10.0, 20.0)) variance(SAMPLE::enum, col0::fp32) = 50.0::fp32?
((10.0, 20.0)) variance(POPULATION::enum, col0::fp64) = 25.0::fp64?

# mixed_range: Variance with mixed range values
((0.0, 100.0, 50.0, 25.0, 75.0)) variance(SAMPLE::enum, col0::fp32) = 1681.25::fp32?
((0.0, 100.0, 50.0, 25.0, 75.0)) variance(POPULATION::enum, col0::fp64) = 1345.0::fp64?

# zero_mean: Variance with values around zero
((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(SAMPLE::enum, col0::fp32) = 2.5::fp32?
((-2.0, -1.0, 0.0, 1.0, 2.0)) variance(POPULATION::enum, col0::fp64) = 2.0::fp64?

# three_values: Variance with three values
((10.0, 20.0, 30.0)) variance(SAMPLE::enum, col0::fp32) = 100.0::fp32?
((10.0, 20.0, 30.0)) variance(POPULATION::enum, col0::fp64) = 66.66666666666667::fp64?
23 changes: 21 additions & 2 deletions tests/coverage/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,29 @@ def is_return_type_error(self):
return isinstance(self.result, SubstraitError)

def get_arg_types(self):
return [arg.get_base_type() for arg in self.args]
types = []
for arg in self.args:
if isinstance(arg, CaseLiteral):
types.append(arg.get_base_type())
elif isinstance(arg, AggregateArgument):
# For aggregate arguments, use column_type if available, otherwise extract from scalar_value
if arg.column_type:
types.append(arg.column_type)
elif arg.scalar_value:
types.append(arg.scalar_value.get_base_type())
return types

def get_signature(self):
return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}"
arg_types = []
for arg in self.args:
if isinstance(arg, CaseLiteral):
arg_types.append(arg.type)
elif isinstance(arg, AggregateArgument):
if arg.column_type:
arg_types.append(arg.column_type)
elif arg.scalar_value:
arg_types.append(arg.scalar_value.type)
return f"{self.func_name}({', '.join(arg_types)}) = {self.get_return_type()}"


@dataclass
Expand Down
Loading