substrait-io · kadinrabo · Jan 29, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
@@ -0,0 +1,309 @@
+%YAML 1.2
+---
+urn: "extension:io.substrait:unsigned_integers"
+
+types:
+  - name: u8
+    description: >
+      Unsigned 8-bit integer (0 to 255).
+      Values are encoded as decimal strings in the structure representation.
+    structure:
+      value: str
+  - name: u16
+    description: >
+      Unsigned 16-bit integer (0 to 65535).
+      Values are encoded as decimal strings in the structure representation.
+    structure:
+      value: str
+  - name: u32
+    description: >
+      Unsigned 32-bit integer (0 to 4294967295).
+      Values are encoded as decimal strings in the structure representation.
+    structure:
+      value: str
+  - name: u64
+    description: >
+      Unsigned 64-bit integer (0 to 18446744073709551615).
+      Values are encoded as decimal strings in the structure representation.
+    structure:
+      value: str
+
+scalar_functions:
+  -
+    name: "add"
+    description: "Add two unsigned integer values."
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+          - name: y
+            value: u!u8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u8
+      - args:
+          - name: x
+            value: u!u16
+          - name: y
+            value: u!u16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u16
+      - args:
+          - name: x
+            value: u!u32
+          - name: y
+            value: u!u32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u32
+      - args:
+          - name: x
+            value: u!u64
+          - name: y
+            value: u!u64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u64
+  -
+    name: "subtract"
+    description: "Subtract one unsigned integer value from another."
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+          - name: y
+            value: u!u8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u8
+      - args:
+          - name: x
+            value: u!u16
+          - name: y
+            value: u!u16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u16
+      - args:
+          - name: x
+            value: u!u32
+          - name: y
+            value: u!u32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u32
+      - args:
+          - name: x
+            value: u!u64
+          - name: y
+            value: u!u64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u64
+  -
+    name: "multiply"
+    description: "Multiply two unsigned integer values."
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+          - name: y
+            value: u!u8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u8
+      - args:
+          - name: x
+            value: u!u16
+          - name: y
+            value: u!u16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u16
+      - args:
+          - name: x
+            value: u!u32
+          - name: y
+            value: u!u32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u32
+      - args:
+          - name: x
+            value: u!u64
+          - name: y
+            value: u!u64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: u!u64
+  -
+    name: "divide"
+    description: >
+      Divide x by y. Partial values are truncated (i.e. rounded towards 0).
+      The `on_division_by_zero` option governs behavior in cases where y is 0.
+      If either x or y are out of range, behavior will be governed by `on_domain_error`.
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+          - name: y
+            value: u!u8
+        options:
+          on_domain_error:
+            values: [ "NULL", ERROR ]
+          on_division_by_zero:
+            values: [ "NULL", ERROR ]
+        return: u!u8
+      - args:
+          - name: x
+            value: u!u16
+          - name: y
+            value: u!u16
+        options:
+          on_domain_error:
+            values: [ "NULL", ERROR ]
+          on_division_by_zero:
+            values: [ "NULL", ERROR ]
+        return: u!u16
+      - args:
+          - name: x
+            value: u!u32
+          - name: y
+            value: u!u32
+        options:
+          on_domain_error:
+            values: [ "NULL", ERROR ]
+          on_division_by_zero:
+            values: [ "NULL", ERROR ]
+        return: u!u32
+      - args:
+          - name: x
+            value: u!u64
+          - name: y
+            value: u!u64
+        options:
+          on_domain_error:
+            values: [ "NULL", ERROR ]
+          on_division_by_zero:
+            values: [ "NULL", ERROR ]
+        return: u!u64
+
+aggregate_functions:
+  - name: "sum"
+    description: Sum a set of unsigned integer values. The sum of zero elements yields null.
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
+      - args:
+          - name: x
+            value: u!u16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
+      - args:
+          - name: x
+            value: u!u32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
+      - args:
+          - name: x
+            value: u!u64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
+  - name: "min"
+    description: Min of a set of unsigned integer values.
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u8?
+        return: u!u8?
+      - args:
+          - name: x
+            value: u!u16
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u16?
+        return: u!u16?
+      - args:
+          - name: x
+            value: u!u32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u32?
+        return: u!u32?
+      - args:
+          - name: x
+            value: u!u64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
+  - name: "max"
+    description: Max of a set of unsigned integer values.
+    impls:
+      - args:
+          - name: x
+            value: u!u8
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u8?
+        return: u!u8?
+      - args:
+          - name: x
+            value: u!u16
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u16?
+        return: u!u16?
+      - args:
+          - name: x
+            value: u!u32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u32?
+        return: u!u32?
+      - args:
+          - name: x
+            value: u!u64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: u!u64?
+        return: u!u64?
@@ -70,6 +70,7 @@ argument
     | precisionTimestampTZArg
     | listArg
     | lambdaArg
+    | udtArg
     | Identifier  // Bare identifiers (for lambda parameters)
     ;
 
@@ -217,6 +218,10 @@ lambdaArg
     : literalLambda DoubleColon funcType
     ;
 
+udtArg
+    : literal DoubleColon UserDefined Identifier isnull=QMark?
+    ;
+
 literalList
     : OBracket (literal (Comma literal)*)? CBracket
     ;

@@ -21,7 +21,7 @@ def write_markdown(file_obj: dict, file_name: str) -> None:
                 mdFile.new_line(f"{key}: {value}")
 
     for function_classification, value in file_obj.items():
-        if function_classification == "urn":
+        if function_classification in ("urn", "dependencies"):
             continue
         function_classification_str = function_classification.replace("_", " ").title()
         mdFile.new_header(level=2, title=f"{function_classification_str}")

@@ -1,16 +1,16 @@
 {
   "registry": {
-    "extension_count": 15,
-    "dependency_count": 15,
+    "extension_count": 16,
+    "dependency_count": 18,
     "function_count": 170,
-    "num_aggregate_functions": 29,
-    "num_scalar_functions": 166,
+    "num_aggregate_functions": 32,
+    "num_scalar_functions": 170,
     "num_window_functions": 11,
-    "num_function_overloads": 529
+    "num_function_overloads": 5013
   },
   "coverage": {
-    "total_test_count": 1136,
-    "num_function_variants": 529,
-    "num_covered_function_variants": 241
+    "total_test_count": 1198,
+    "num_function_variants": 557,
+    "num_covered_function_variants": 269
   }
 }
@@ -0,0 +1,16 @@
+### SUBSTRAIT_SCALAR_TEST: v1.0
+### SUBSTRAIT_INCLUDE: '/extensions/unsigned_integers.yaml'
+
+# basic: Basic unsigned integer examples
+add('200'::u!u8, '50'::u!u8) = '250'::u!u8
+add('50000'::u!u16, '10000'::u!u16) = '60000'::u!u16
+add('3000000000'::u!u32, '1000000000'::u!u32) = '4000000000'::u!u32
+add('10000000000000000000'::u!u64, '1000000000000000000'::u!u64) = '11000000000000000000'::u!u64
+
+# overflow: Examples demonstrating overflow behavior
+add('200'::u!u8, '100'::u!u8) [overflow:ERROR] = <!ERROR>
+add('60000'::u!u16, '10000'::u!u16) [overflow:ERROR] = <!ERROR>
+add('4000000000'::u!u32, '1000000000'::u!u32) [overflow:ERROR] = <!ERROR>
+add('18446744073709551615'::u!u64, '1'::u!u64) [overflow:ERROR] = <!ERROR>
+add('200'::u!u8, '100'::u!u8) [overflow:SATURATE] = '255'::u!u8
+add('200'::u!u8, '100'::u!u8) [overflow:SILENT] = <!UNDEFINED>
@@ -0,0 +1,12 @@
+### SUBSTRAIT_SCALAR_TEST: v1.0
+### SUBSTRAIT_INCLUDE: '/extensions/unsigned_integers.yaml'
+
+# basic: Basic unsigned integer examples
+divide('250'::u!u8, '5'::u!u8) = '50'::u!u8
+divide('60000'::u!u16, '100'::u!u16) = '600'::u!u16
+divide('4000000000'::u!u32, '200'::u!u32) = '20000000'::u!u32
+divide('10000000000000000000'::u!u64, '5000'::u!u64) = '2000000000000000'::u!u64
+
+# division_by_zero: Examples demonstrating division by zero
+divide('5'::u!u8, '0'::u!u8) [on_division_by_zero:NULL] = null::u!u8
+divide('5'::u!u8, '0'::u!u8) [on_division_by_zero:ERROR] = <!ERROR>