Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d19c87a
feat(extensions): add unsigned integer extension types (u8, u16, u32,…
kadinrabo Jan 29, 2026
a413f3e
feat(extensions): add arithmetic function impls for unsigned types
kadinrabo Jan 30, 2026
b4bd79b
feat(tests): add UDT argument support in test framework
kadinrabo Jan 30, 2026
719ddb5
chore: regenerate ANTLR parsers
kadinrabo Jan 30, 2026
37a1a40
feat(tests): add unsigned integer test cases
kadinrabo Jan 30, 2026
41cee40
chore: update test counts and baseline
kadinrabo Jan 30, 2026
10baf81
chore: add dependency on extension_types_numeric
kadinrabo Jan 30, 2026
078ddbc
qualify UDT references with dependency alias
kadinrabo Feb 11, 2026
0d4aa26
rename type file to unsigned_integers, update URN
kadinrabo Feb 11, 2026
8d37e9e
add string structure encoding for unsigned types
kadinrabo Feb 11, 2026
ec638bf
move unsigned functions into self-contained extension file
kadinrabo Feb 11, 2026
b7990a8
remove redundant hardcoded UDT type mappings
kadinrabo Feb 11, 2026
a14d6d7
remove unused dependency alias grammar changes
kadinrabo Feb 11, 2026
856e63d
split unsigned tests into separate files, fix test framework
kadinrabo Feb 11, 2026
dbd3c8e
Merge remote-tracking branch 'upstream/main' into feat/unsigned-exten…
kadinrabo Feb 27, 2026
62a4767
remove pycache from tracking
kadinrabo Feb 27, 2026
7f577ce
revert gitignore change
kadinrabo Feb 27, 2026
4bec245
add comment explaining extension file scanner change
kadinrabo Feb 27, 2026
215fcee
improve type descriptions and divide description
kadinrabo Mar 2, 2026
cd6b2a8
add overflow and null handling test cases
kadinrabo Mar 2, 2026
fce64b0
remove overflow option from unsigned divide
kadinrabo Mar 5, 2026
04a8882
remove modulus function and tests
kadinrabo Mar 5, 2026
7c4c54a
merge upstream/main, regenerate parser and baseline
kadinrabo Mar 6, 2026
3ac870f
merge upstream/main, fix UDT nullability and grammar ordering
kadinrabo Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
309 changes: 309 additions & 0 deletions extensions/unsigned_integers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
%YAML 1.2
---
urn: "extension:io.substrait:unsigned_integers"

types:
Comment thread
kadinrabo marked this conversation as resolved.
- name: u8
description: >
Unsigned 8-bit integer (0 to 255).
Values are encoded as decimal strings in the structure representation.
structure:
value: str
- name: u16
description: >
Unsigned 16-bit integer (0 to 65535).
Values are encoded as decimal strings in the structure representation.
structure:
value: str
- name: u32
description: >
Unsigned 32-bit integer (0 to 4294967295).
Values are encoded as decimal strings in the structure representation.
structure:
value: str
- name: u64
description: >
Unsigned 64-bit integer (0 to 18446744073709551615).
Values are encoded as decimal strings in the structure representation.
structure:
value: str

scalar_functions:
Comment thread
vbarua marked this conversation as resolved.
-
name: "add"
description: "Add two unsigned integer values."
impls:
- args:
- name: x
value: u!u8
- name: y
value: u!u8
options:
overflow:
Comment thread
kadinrabo marked this conversation as resolved.
values: [ SILENT, SATURATE, ERROR ]
return: u!u8
- args:
- name: x
value: u!u16
- name: y
value: u!u16
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u16
- args:
- name: x
value: u!u32
- name: y
value: u!u32
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u32
- args:
- name: x
value: u!u64
- name: y
value: u!u64
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u64
-
name: "subtract"
description: "Subtract one unsigned integer value from another."
impls:
- args:
- name: x
value: u!u8
- name: y
value: u!u8
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u8
- args:
- name: x
value: u!u16
- name: y
value: u!u16
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u16
- args:
- name: x
value: u!u32
- name: y
value: u!u32
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u32
- args:
- name: x
value: u!u64
- name: y
value: u!u64
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u64
-
name: "multiply"
description: "Multiply two unsigned integer values."
impls:
- args:
- name: x
value: u!u8
- name: y
value: u!u8
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u8
- args:
- name: x
value: u!u16
- name: y
value: u!u16
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u16
- args:
- name: x
value: u!u32
- name: y
value: u!u32
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u32
- args:
- name: x
value: u!u64
- name: y
value: u!u64
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
return: u!u64
-
name: "divide"
description: >
Divide x by y. Partial values are truncated (i.e. rounded towards 0).
The `on_division_by_zero` option governs behavior in cases where y is 0.
If either x or y are out of range, behavior will be governed by `on_domain_error`.
impls:
- args:
- name: x
value: u!u8
- name: y
value: u!u8
options:
on_domain_error:
values: [ "NULL", ERROR ]
on_division_by_zero:
values: [ "NULL", ERROR ]
return: u!u8
- args:
- name: x
value: u!u16
- name: y
value: u!u16
options:
on_domain_error:
values: [ "NULL", ERROR ]
on_division_by_zero:
values: [ "NULL", ERROR ]
return: u!u16
- args:
- name: x
value: u!u32
- name: y
value: u!u32
options:
on_domain_error:
values: [ "NULL", ERROR ]
on_division_by_zero:
values: [ "NULL", ERROR ]
return: u!u32
- args:
- name: x
value: u!u64
- name: y
value: u!u64
options:
on_domain_error:
values: [ "NULL", ERROR ]
on_division_by_zero:
values: [ "NULL", ERROR ]
return: u!u64

aggregate_functions:
- name: "sum"
description: Sum a set of unsigned integer values. The sum of zero elements yields null.
Comment thread
kadinrabo marked this conversation as resolved.
impls:
- args:
- name: x
value: u!u8
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
- args:
- name: x
value: u!u16
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
- args:
- name: x
value: u!u32
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
- args:
- name: x
value: u!u64
options:
overflow:
values: [ SILENT, SATURATE, ERROR ]
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
- name: "min"
description: Min of a set of unsigned integer values.
impls:
- args:
- name: x
value: u!u8
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u8?
return: u!u8?
- args:
- name: x
value: u!u16
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u16?
return: u!u16?
- args:
- name: x
value: u!u32
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u32?
return: u!u32?
- args:
- name: x
value: u!u64
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
- name: "max"
description: Max of a set of unsigned integer values.
impls:
- args:
- name: x
value: u!u8
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u8?
return: u!u8?
- args:
- name: x
value: u!u16
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u16?
return: u!u16?
- args:
- name: x
value: u!u32
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u32?
return: u!u32?
- args:
- name: x
value: u!u64
nullability: DECLARED_OUTPUT
decomposable: MANY
intermediate: u!u64?
return: u!u64?
5 changes: 5 additions & 0 deletions grammar/FuncTestCaseParser.g4
Comment thread
kadinrabo marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ argument
| precisionTimestampTZArg
| listArg
| lambdaArg
| udtArg
| Identifier // Bare identifiers (for lambda parameters)
;

Expand Down Expand Up @@ -217,6 +218,10 @@ lambdaArg
: literalLambda DoubleColon funcType
;

udtArg
: literal DoubleColon UserDefined Identifier isnull=QMark?
;

literalList
: OBracket (literal (Comma literal)*)? CBracket
;
Expand Down
2 changes: 1 addition & 1 deletion site/docs/extensions/generate_function_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def write_markdown(file_obj: dict, file_name: str) -> None:
mdFile.new_line(f"{key}: {value}")

for function_classification, value in file_obj.items():
if function_classification == "urn":
if function_classification in ("urn", "dependencies"):
continue
function_classification_str = function_classification.replace("_", " ").title()
mdFile.new_header(level=2, title=f"{function_classification_str}")
Expand Down
16 changes: 8 additions & 8 deletions tests/baseline.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"registry": {
"extension_count": 15,
"dependency_count": 15,
"extension_count": 16,
"dependency_count": 18,
"function_count": 170,
"num_aggregate_functions": 29,
"num_scalar_functions": 166,
"num_aggregate_functions": 32,
"num_scalar_functions": 170,
"num_window_functions": 11,
"num_function_overloads": 529
"num_function_overloads": 5013
},
"coverage": {
"total_test_count": 1136,
"num_function_variants": 529,
"num_covered_function_variants": 241
"total_test_count": 1198,
"num_function_variants": 557,
"num_covered_function_variants": 269
}
}
16 changes: 16 additions & 0 deletions tests/cases/arithmetic_unsigned/add.test
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have null cases added to these tests? Might be overkill but can't hurt.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to keep parity with signed tests here, which don't have null cases for add/subtract/multiply either. Open to adding them if you feel strongly, but might be better as a follow up

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay with not doing this for now.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
Comment thread
vbarua marked this conversation as resolved.
### SUBSTRAIT_INCLUDE: '/extensions/unsigned_integers.yaml'

# basic: Basic unsigned integer examples
add('200'::u!u8, '50'::u!u8) = '250'::u!u8
add('50000'::u!u16, '10000'::u!u16) = '60000'::u!u16
add('3000000000'::u!u32, '1000000000'::u!u32) = '4000000000'::u!u32
add('10000000000000000000'::u!u64, '1000000000000000000'::u!u64) = '11000000000000000000'::u!u64

# overflow: Examples demonstrating overflow behavior
add('200'::u!u8, '100'::u!u8) [overflow:ERROR] = <!ERROR>
add('60000'::u!u16, '10000'::u!u16) [overflow:ERROR] = <!ERROR>
add('4000000000'::u!u32, '1000000000'::u!u32) [overflow:ERROR] = <!ERROR>
add('18446744073709551615'::u!u64, '1'::u!u64) [overflow:ERROR] = <!ERROR>
add('200'::u!u8, '100'::u!u8) [overflow:SATURATE] = '255'::u!u8
add('200'::u!u8, '100'::u!u8) [overflow:SILENT] = <!UNDEFINED>
12 changes: 12 additions & 0 deletions tests/cases/arithmetic_unsigned/divide.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/unsigned_integers.yaml'

# basic: Basic unsigned integer examples
divide('250'::u!u8, '5'::u!u8) = '50'::u!u8
divide('60000'::u!u16, '100'::u!u16) = '600'::u!u16
divide('4000000000'::u!u32, '200'::u!u32) = '20000000'::u!u32
divide('10000000000000000000'::u!u64, '5000'::u!u64) = '2000000000000000'::u!u64

# division_by_zero: Examples demonstrating division by zero
divide('5'::u!u8, '0'::u!u8) [on_division_by_zero:NULL] = null::u!u8
divide('5'::u!u8, '0'::u!u8) [on_division_by_zero:ERROR] = <!ERROR>
Loading