diff --git a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java index 9caa2125427..4554b3d060d 100644 --- a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java +++ b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java @@ -55,6 +55,11 @@ public PreparedStatement compile(@NonNull RelNode plan) { } private PreparedStatement doCompile(RelNode plan) throws Exception { + // Apply pre-compilation rules (e.g., late-binding function impl) + for (var rule : context.getLangSpec().preCompilationRules()) { + plan = plan.accept(rule); + } + // Apply shuttle to convert LogicalTableScan to BindableTableScan final RelHomogeneousShuttle shuttle = new RelHomogeneousShuttle() { diff --git a/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java new file mode 100644 index 00000000000..e1916d33f5b --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java @@ -0,0 +1,186 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import java.util.Objects; +import java.util.function.BiFunction; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** Fluent DSL for building {@link UnifiedFunctionSpec} instances. */ +@RequiredArgsConstructor +class FunctionSpecBuilder { + /** Function name to register. */ + private final String name; + + /** + * Wraps an existing Calcite operator, preserving its native type system and RexImpTable + * implementation for in-memory execution. + * + * @param op the Calcite operator to delegate to + * @return a builder that produces the spec on {@code build()} + */ + DelegateFunctionBuilder delegateTo(SqlOperator op) { + return new DelegateFunctionBuilder(name, op); + } + + /** + * Builds a pushdown-only UDF with relaxed type checking. The resulting function has no local + * implementation and delegates execution to the data source via pushdown. + * + * @param paramNames required parameter names for signature display + * @return a builder that produces the spec on {@code build()} + */ + CatalogFunctionBuilder vararg(String... paramNames) { + return new CatalogFunctionBuilder(name, List.of(paramNames)); + } + + /** + * Builds a typed SqlFunction with strict operand type checking. Optionally accepts a late-binding + * {@code impl} that rewrites the function into executable Calcite expressions at compilation + * time. + * + * @param families operand type families for validation + * @return a builder that produces the spec on {@code build()} + */ + DefaultFunctionBuilder operands(SqlTypeFamily... families) { + return new DefaultFunctionBuilder(name, families); + } + + @RequiredArgsConstructor + static class DefaultFunctionBuilder { + private final String name; + private final SqlTypeFamily[] operandFamilies; + private SqlReturnTypeInference returnType; + private SqlFunctionCategory category = SqlFunctionCategory.USER_DEFINED_FUNCTION; + private @Nullable BiFunction impl; + + DefaultFunctionBuilder returns(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + DefaultFunctionBuilder category(SqlFunctionCategory cat) { + this.category = cat; + return this; + } + + /** + * Defines how this function executes by rewriting to existing Calcite operators. Applied only + * at compilation time (late binding) — the logical plan preserves the original function call. + * + * @param impl rewrite function that converts this call into executable RexNodes + * @return this builder + */ + DefaultFunctionBuilder impl(BiFunction impl) { + this.impl = impl; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returns() is required"); + SqlFunction op = + new SqlFunction( + name.toUpperCase(), + SqlKind.OTHER_FUNCTION, + returnType, + null, + OperandTypes.family(operandFamilies), + category); + return new UnifiedFunctionSpec(name.toLowerCase(), op, impl); + } + } + + @RequiredArgsConstructor + static class DelegateFunctionBuilder { + private final String name; + private final SqlOperator operator; + + UnifiedFunctionSpec build() { + return new UnifiedFunctionSpec(name.toLowerCase(), operator, null); + } + } + + @RequiredArgsConstructor + static class CatalogFunctionBuilder { + private final String name; + private final List paramNames; + private SqlReturnTypeInference returnType; + + CatalogFunctionBuilder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + name, + new SqlUserDefinedFunction( + new SqlIdentifier(name, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of), // Pushdown-only: no local implementation + null); + } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { + + @Override + public List paramNames() { + return paramNames; + } + + @Override + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java index e824c89f8de..4009ee13bc0 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java @@ -57,6 +57,15 @@ default List> postParseRules() { default List postAnalysisRules() { return List.of(); } + + /** + * Pre-compilation rules applied only before in-memory execution. Each rule transforms the + * logical plan (e.g., binding function implementations). Not applied when the plan is consumed + * by external engines. + */ + default List preCompilationRules() { + return List.of(); + } } /** @@ -104,4 +113,12 @@ default List> postParseRules() { default List postAnalysisRules() { return extensions().stream().flatMap(ext -> ext.postAnalysisRules().stream()).toList(); } + + /** + * All pre-compilation rules from registered extensions, flattened in registration order. Applied + * only before in-memory execution. + */ + default List preCompilationRules() { + return extensions().stream().flatMap(ext -> ext.preCompilationRules().stream()).toList(); + } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java index f60fc61a50c..72392b7c520 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -5,34 +5,36 @@ package org.opensearch.sql.api.spec; +import static org.apache.calcite.sql.SqlFunctionCategory.TIMEDATE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.LENGTH; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3; +import static org.apache.calcite.sql.fun.SqlStdOperatorTable.FLOOR; +import static org.apache.calcite.sql.type.ReturnTypes.ARG1_NULLABLE; import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; +import static org.apache.calcite.sql.type.SqlTypeFamily.CHARACTER; +import static org.apache.calcite.sql.type.SqlTypeFamily.DATETIME; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.annotation.Nullable; import lombok.AccessLevel; +import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; import lombok.ToString; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlCallBinding; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.avatica.util.TimeUnitRange; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperatorTable; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.InferTypes; -import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlOperandMetadata; -import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.util.SqlOperatorTables; -import org.apache.calcite.sql.validate.SqlUserDefinedFunction; /** * Declarative registry of language-level functions for the unified query engine. Functions defined @@ -43,7 +45,7 @@ @Getter @ToString(of = "funcName") @EqualsAndHashCode(of = "funcName") -@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +@AllArgsConstructor(access = AccessLevel.PACKAGE) public final class UnifiedFunctionSpec { /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ @@ -52,6 +54,31 @@ public final class UnifiedFunctionSpec { /** Calcite operator for chaining into the framework config's operator table. */ private final SqlOperator operator; + /** Optional late-binding implementation applied only at compilation time. */ + private final @Nullable BiFunction impl; + + /** Common scalar functions beyond standard. */ + public static final Category SCALAR = + new Category( + List.of( + function("length").delegateTo(LENGTH).build(), + function("regexp_replace").delegateTo(REGEXP_REPLACE_3).build(), + function("date_trunc") + .operands(CHARACTER, DATETIME) + .returns(ARG1_NULLABLE) + .category(TIMEDATE) + .impl( + (rexBuilder, call) -> { + RexLiteral unitLiteral = (RexLiteral) call.operands.get(0); + String unit = unitLiteral.getValueAs(String.class); + RexNode datetime = call.operands.get(1); + return rexBuilder.makeCall( + FLOOR, + datetime, + rexBuilder.makeFlag(TimeUnitRange.valueOf(unit.toUpperCase()))); + }) + .build())); + /** Full-text search functions. */ public static final Category RELEVANCE = new Category( @@ -65,8 +92,8 @@ public final class UnifiedFunctionSpec { function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); /** All registered function specs, keyed by function name. */ - private static final Map ALL_SPECS = - Stream.of(RELEVANCE) + public static final Map ALL_SPECS = + Stream.of(SCALAR, RELEVANCE) .flatMap(c -> c.specs().stream()) .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); @@ -101,71 +128,8 @@ public boolean contains(UnifiedFunctionSpec spec) { } } - public static Builder function(String name) { - return new Builder(name); - } - - /** Fluent builder for function specs. */ - @RequiredArgsConstructor(access = AccessLevel.PRIVATE) - public static class Builder { - private final String funcName; - private List paramNames = List.of(); - private SqlReturnTypeInference returnType; - - public Builder vararg(String... names) { - this.paramNames = List.of(names); - return this; - } - - public Builder returnType(SqlReturnTypeInference type) { - this.returnType = type; - return this; - } - - public UnifiedFunctionSpec build() { - Objects.requireNonNull(returnType, "returnType is required"); - return new UnifiedFunctionSpec( - funcName, - new SqlUserDefinedFunction( - new SqlIdentifier(funcName, SqlParserPos.ZERO), - SqlKind.OTHER_FUNCTION, - returnType, - InferTypes.ANY_NULLABLE, - new VariadicOperandMetadata(paramNames), - List::of)); // Pushdown-only: no local implementation - } - } - - /** - * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code - * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts - * any operand types and delegates validation to pushdown. - */ - private record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { - - @Override - public List paramNames() { - return paramNames; - } - - @Override - public List paramTypes(RelDataTypeFactory tf) { - return List.of(); - } - - @Override - public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { - return true; // Bypass: CALCITE-5366 breaks optional argument type checking - } - - @Override - public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.from(paramNames.size()); - } - - @Override - public String getAllowedSignatures(SqlOperator op, String opName) { - return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; - } + /** Entry point for the function spec builder DSL. */ + private static FunctionSpecBuilder function(String name) { + return new FunctionSpecBuilder(name); } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java index a5433f015fa..28eeaa89abf 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java @@ -16,6 +16,7 @@ import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.core.CoreExtension; import org.opensearch.sql.api.spec.search.SearchExtension; /** @@ -50,7 +51,7 @@ public static UnifiedSqlSpec extended() { Lex.BIG_QUERY, SqlBabelParserImpl.FACTORY, SqlConformanceEnum.BABEL, - List.of(new SearchExtension())); + List.of(new CoreExtension(), new SearchExtension())); } @Override diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java new file mode 100644 index 00000000000..17aa8a20bee --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.List; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.sql.SqlOperatorTable; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Core extension that extends the default language spec with additional functions and capabilities. + */ +public class CoreExtension implements LanguageSpec.LanguageExtension { + + @Override + public SqlOperatorTable operators() { + return UnifiedFunctionSpec.SCALAR.operatorTable(); + } + + @Override + public List preCompilationRules() { + return List.of(new LateBindingFunctionRule()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java new file mode 100644 index 00000000000..3294d21a241 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.Map; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlOperator; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Binds custom function implementations at compilation time by rewriting to executable Calcite + * expressions. + */ +class LateBindingFunctionRule extends RelHomogeneousShuttle { + + /** Operator-to-impl mappings collected from all function specs. */ + private final Map> bindings = + UnifiedFunctionSpec.ALL_SPECS.values().stream() + .filter(spec -> spec.getImpl() != null) + .collect( + Collectors.toMap(UnifiedFunctionSpec::getOperator, UnifiedFunctionSpec::getImpl)); + + @Override + public RelNode visit(RelNode node) { + RelNode visited = super.visit(node); + RexBuilder rexBuilder = node.getCluster().getRexBuilder(); + return visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + RexCall visited = (RexCall) super.visitCall(call); + return Optional.ofNullable(bindings.get(visited.getOperator())) + .map(impl -> impl.apply(rexBuilder, visited)) + .orElse(visited); + } + }); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java new file mode 100644 index 00000000000..a16fa116b42 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Timestamp; +import org.apache.calcite.rel.RelNode; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for scalar functions registered in {@link UnifiedFunctionSpec#SCALAR}. Verifies planning + * (function resolves correctly) and execution (produces correct results in-memory). + */ +public class UnifiedFunctionSpecTest extends UnifiedQueryTestBase { + + private UnifiedQueryCompiler compiler; + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + @Test + public void testLength() throws Exception { + assertEquals(5, eval("LENGTH('hello')")); + assertEquals(0, eval("LENGTH('')")); + } + + @Test + public void testRegexpReplace() throws Exception { + assertEquals("XbcXbc", eval("REGEXP_REPLACE('abcabc', 'a', 'X')")); + assertEquals("hello", eval("REGEXP_REPLACE('hello', 'xyz', 'X')")); + } + + @Test + public void testDateTrunc() throws Exception { + // Plan preserves DATE_TRUNC (late binding — not rewritten until compilation) + givenQuery( + "SELECT DATE_TRUNC('minute', TIMESTAMP '2023-01-01 12:34:56') FROM catalog.employees") + .assertPlanContains("DATE_TRUNC('minute', 2023-01-01 12:34:56)"); + + // Execution rewrites to FLOOR and produces truncated timestamp + Object result = eval("DATE_TRUNC('hour', TIMESTAMP '2023-07-15 14:30:45')"); + assertEquals(Timestamp.valueOf("2023-07-15 14:00:00"), result); + } + + @Test + public void testFunctionSpecLookup() { + assertTrue(UnifiedFunctionSpec.of("length").isPresent()); + assertTrue(UnifiedFunctionSpec.of("regexp_replace").isPresent()); + assertTrue(UnifiedFunctionSpec.of("date_trunc").isPresent()); + } + + private Object eval(String expr) throws Exception { + RelNode plan = planner.plan("SELECT " + expr + " AS v FROM (VALUES (0)) AS t(dummy)"); + try (PreparedStatement stmt = compiler.compile(plan); + ResultSet rs = stmt.executeQuery()) { + assertTrue(rs.next()); + return rs.getObject(1); + } + } +}