diff --git a/csharp/src/StatementExecution/StatementExecutionConnection.cs b/csharp/src/StatementExecution/StatementExecutionConnection.cs index 2a112f41a..5a510a5e8 100644 --- a/csharp/src/StatementExecution/StatementExecutionConnection.cs +++ b/csharp/src/StatementExecution/StatementExecutionConnection.cs @@ -18,6 +18,8 @@ using System.Collections.Generic; using System.Linq; using System.Net.Http; +using System.Text; +using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using AdbcDrivers.Databricks.Http; @@ -589,46 +591,200 @@ async Task> IGetObjectsDataProvider.GetCatalogsAsync(strin async Task> IGetObjectsDataProvider.GetSchemasAsync(string? catalogPattern, string? schemaPattern, CancellationToken cancellationToken) { - // Note: catalogPattern comes from GetObjectsResultBuilder which resolves individual - // catalog names before calling this method. Despite the "pattern" name (from the - // IGetObjectsDataProvider interface), the value passed to ShowSchemasCommand is used - // as a literal catalog identifier (backtick-quoted), not a wildcard pattern. - string sql = new ShowSchemasCommand(catalogPattern, schemaPattern).Build(); - var batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false); + // When catalogPattern is null, use SHOW SCHEMAS IN ALL CATALOGS. + // When catalogPattern contains unescaped wildcards (% or _), fetch all schemas and + // filter by catalog pattern client-side to avoid invalid SQL like SHOW SCHEMAS IN `%`. + // When catalogPattern is an empty string or a literal that doesn't exist on the server, + // return an empty rowset rather than propagating the server error. - // SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog - // SHOW SCHEMAS IN `catalog` returns 1 column: databaseName - bool showSchemasInAllCatalogs = catalogPattern == null; + if (catalogPattern == null) + { + // SHOW SCHEMAS IN ALL CATALOGS — existing behavior, returns (databaseName, catalog). + string sql = new ShowSchemasCommand(null, schemaPattern).Build(); + var batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false); + return ExtractSchemasFromAllCatalogs(batches, catalogFilter: null); + } - var result = new List<(string, string)>(); - foreach (var batch in batches) + if (catalogPattern.Length == 0) { - StringArray? catalogArray = null; - StringArray? schemaArray = null; + // Empty string cannot match any real catalog name. + return System.Array.Empty<(string, string)>(); + } - if (showSchemasInAllCatalogs) - { - schemaArray = batch.Column(0) as StringArray; - catalogArray = batch.Column(1) as StringArray; - } - else + if (ContainsUnescapedWildcard(catalogPattern)) + { + // Wildcard catalog pattern: fetch all schemas, then filter by catalog regex. + string sql = new ShowSchemasCommand(null, schemaPattern).Build(); + var batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false); + Regex catalogRegex = CatalogPatternToRegex(catalogPattern); + return ExtractSchemasFromAllCatalogs(batches, catalogRegex); + } + + // Literal catalog name (possibly with \_ or \% escapes decoded): try the direct query. + // Decode ADBC escape sequences: \_ → _, \% → % + string literalCatalog = DecodeLiteralPattern(catalogPattern); + try + { + string sql = new ShowSchemasCommand(literalCatalog, schemaPattern).Build(); + var batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false); + // SHOW SCHEMAS IN `catalog` returns 1 column: databaseName + var result = new List<(string, string)>(); + foreach (var batch in batches) { - schemaArray = batch.Column(0) as StringArray; + var schemaArray = batch.Column(0) as StringArray; + if (schemaArray == null) continue; + for (int i = 0; i < batch.Length; i++) + { + if (!schemaArray.IsNull(i)) + result.Add((literalCatalog, schemaArray.GetString(i))); + } } + return result; + } + catch (Exception ex) when (IsNotFoundOrInvalidIdentifierException(ex)) + { + // The catalog doesn't exist or the name is invalid — return empty rowset per spec. + return System.Array.Empty<(string, string)>(); + } + } + /// + /// Extracts (catalog, schema) pairs from SHOW SCHEMAS IN ALL CATALOGS result batches. + /// If is non-null, only rows whose catalog matches are included. + /// + private static List<(string, string)> ExtractSchemasFromAllCatalogs( + IEnumerable batches, + Regex? catalogFilter) + { + var result = new List<(string, string)>(); + foreach (var batch in batches) + { + // SHOW SCHEMAS IN ALL CATALOGS: column 0 = databaseName, column 1 = catalog + var schemaArray = batch.Column(0) as StringArray; + var catalogArray = batch.Column(1) as StringArray; if (schemaArray == null) continue; for (int i = 0; i < batch.Length; i++) { if (schemaArray.IsNull(i)) continue; string catalog = catalogArray != null && !catalogArray.IsNull(i) ? catalogArray.GetString(i) - : catalogPattern ?? ""; + : ""; + if (catalogFilter != null && !catalogFilter.IsMatch(catalog)) + continue; result.Add((catalog, schemaArray.GetString(i))); } } return result; } + /// + /// Returns true if the ADBC SQL pattern contains an unescaped % or _ + /// wildcard character. A \ followed by % or _ is an escaped literal. + /// + internal static bool ContainsUnescapedWildcard(string pattern) + { + for (int i = 0; i < pattern.Length; i++) + { + char c = pattern[i]; + if (c == '\\') + { + i++; // skip next char (it is escaped) + continue; + } + if (c == '%' || c == '_') + return true; + } + return false; + } + + /// + /// Converts an ADBC/SQL wildcard pattern to a case-insensitive . + /// + /// %.* (any sequence of characters) + /// _. (any single character) + /// \__ (literal underscore) + /// \%% (literal percent) + /// \\\\ (literal backslash) + /// + /// + internal static Regex CatalogPatternToRegex(string pattern) + { + var sb = new StringBuilder("^"); + for (int i = 0; i < pattern.Length; i++) + { + char c = pattern[i]; + if (c == '\\' && i + 1 < pattern.Length) + { + char next = pattern[i + 1]; + if (next == '_' || next == '%' || next == '\\') + { + sb.Append(Regex.Escape(next.ToString())); + i++; + continue; + } + // Lone backslash — treat as literal. + sb.Append(Regex.Escape("\\")); + } + else if (c == '%') + { + sb.Append(".*"); + } + else if (c == '_') + { + sb.Append('.'); + } + else + { + sb.Append(Regex.Escape(c.ToString())); + } + } + sb.Append('$'); + return new Regex(sb.ToString(), RegexOptions.IgnoreCase | RegexOptions.Singleline); + } + + /// + /// Decodes ADBC escape sequences in a pattern that contains no unescaped wildcards, + /// returning the literal string it represents. + /// \__, \%%, \\\. + /// + internal static string DecodeLiteralPattern(string pattern) + { + if (!pattern.Contains('\\')) + return pattern; + + var sb = new StringBuilder(pattern.Length); + for (int i = 0; i < pattern.Length; i++) + { + char c = pattern[i]; + if (c == '\\' && i + 1 < pattern.Length) + { + char next = pattern[i + 1]; + if (next == '_' || next == '%' || next == '\\') + { + sb.Append(next); + i++; + continue; + } + } + sb.Append(c); + } + return sb.ToString(); + } + + /// + /// Returns true for exceptions that indicate the catalog name is not found or is syntactically + /// invalid as an identifier (e.g., empty string, nonexistent catalog). + /// + private static bool IsNotFoundOrInvalidIdentifierException(Exception ex) + { + // DatabricksException (which extends AdbcException) is the standard exception thrown + // by the SEA backend when a SQL statement fails (e.g. "SHOW SCHEMAS IN `nonexistent`"). + // Catching it here allows us to return an empty rowset per spec rather than propagating + // the server error. Network errors and other unrelated failures are not AdbcExceptions + // and will still propagate normally. + return ex is AdbcException; + } + async Task> IGetObjectsDataProvider.GetTablesAsync( string? catalogPattern, string? schemaPattern, string? tableNamePattern, IReadOnlyList? tableTypes, CancellationToken cancellationToken) { diff --git a/csharp/test/Unit/StatementExecution/GetSchemasAsyncTests.cs b/csharp/test/Unit/StatementExecution/GetSchemasAsyncTests.cs new file mode 100644 index 000000000..0fc9ad6b7 --- /dev/null +++ b/csharp/test/Unit/StatementExecution/GetSchemasAsyncTests.cs @@ -0,0 +1,230 @@ +/* +* Copyright (c) 2025 ADBC Drivers Contributors +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System.Text.RegularExpressions; +using AdbcDrivers.Databricks.StatementExecution; +using Xunit; + +namespace AdbcDrivers.Databricks.Tests.Unit.StatementExecution +{ + /// + /// Unit tests for the helper methods introduced to fix PECO-3017: + /// , + /// , and + /// . + /// + /// These helpers enable GetSchemasAsync to correctly handle wildcard catalog + /// patterns (%, _), escaped literals (\_), empty strings, and nonexistent + /// catalog names without throwing server-side exceptions. + /// + public class GetSchemasAsyncTests + { + // ---------------------------------------------------------------- + // ContainsUnescapedWildcard + // ---------------------------------------------------------------- + + [Theory] + [InlineData("%", true)] // bare % is a wildcard + [InlineData("compar%", true)] // trailing % + [InlineData("_", true)] // bare _ is a wildcard + [InlineData("comparator_tests", true)] // unescaped _ is a wildcard + [InlineData(@"comparator\_tests", false)] // \_ is escaped, not a wildcard + [InlineData(@"\%", false)] // \% is escaped, not a wildcard + [InlineData("", false)] // empty — no wildcard + [InlineData("nonexistent", false)] // plain literal — no wildcard + [InlineData("main", false)] // plain literal — no wildcard + [InlineData(@"\\", false)] // escaped backslash — no wildcard + public void ContainsUnescapedWildcard_ReturnsExpected(string pattern, bool expected) + { + Assert.Equal(expected, StatementExecutionConnection.ContainsUnescapedWildcard(pattern)); + } + + // ---------------------------------------------------------------- + // CatalogPatternToRegex — wildcard expansion + // ---------------------------------------------------------------- + + [Fact] + public void CatalogPatternToRegex_PercentMatchesAllCatalogs() + { + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("%"); + Assert.Matches(regex, "main"); + Assert.Matches(regex, "comparator_tests"); + Assert.Matches(regex, "hive_metastore"); + Assert.Matches(regex, ""); + } + + [Fact] + public void CatalogPatternToRegex_PercentPrefix_MatchesCatalogsStartingWithPrefix() + { + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("compar%"); + Assert.Matches(regex, "comparator_tests"); + Assert.Matches(regex, "comparator-tests"); + Assert.Matches(regex, "compar"); + Assert.DoesNotMatch(regex, "main"); + Assert.DoesNotMatch(regex, "xcompar"); + } + + [Fact] + public void CatalogPatternToRegex_UnderscoreMatchesSingleChar() + { + // "comparator_tests" pattern: _ matches any single char, + // so it should match "comparator_tests" and "comparator-tests". + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("comparator_tests"); + Assert.Matches(regex, "comparator_tests"); + Assert.Matches(regex, "comparator-tests"); + Assert.Matches(regex, "comparatorXtests"); + Assert.DoesNotMatch(regex, "comparatortests"); // _ requires exactly one char + Assert.DoesNotMatch(regex, "comparator__tests"); // one char too many + } + + [Fact] + public void CatalogPatternToRegex_EscapedUnderscore_MatchesLiteralUnderscore() + { + // "comparator\_tests": \_ is a literal underscore, not a single-char wildcard. + Regex regex = StatementExecutionConnection.CatalogPatternToRegex(@"comparator\_tests"); + Assert.Matches(regex, "comparator_tests"); + Assert.DoesNotMatch(regex, "comparator-tests"); + Assert.DoesNotMatch(regex, "comparatorXtests"); + } + + [Fact] + public void CatalogPatternToRegex_EscapedPercent_MatchesLiteralPercent() + { + Regex regex = StatementExecutionConnection.CatalogPatternToRegex(@"cat\%log"); + Assert.Matches(regex, "cat%log"); + Assert.DoesNotMatch(regex, "catalog"); + Assert.DoesNotMatch(regex, "catXlog"); + } + + [Fact] + public void CatalogPatternToRegex_IsCaseInsensitive() + { + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("MAIN"); + Assert.Matches(regex, "main"); + Assert.Matches(regex, "Main"); + Assert.Matches(regex, "MAIN"); + } + + [Fact] + public void CatalogPatternToRegex_RegexSpecialCharsInPattern_AreEscaped() + { + // Dots in a catalog name must be treated as literals, not regex wildcards. + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("my.catalog"); + Assert.Matches(regex, "my.catalog"); + Assert.DoesNotMatch(regex, "myXcatalog"); // dot is literal, not regex wildcard + } + + // ---------------------------------------------------------------- + // DecodeLiteralPattern + // ---------------------------------------------------------------- + + [Fact] + public void DecodeLiteralPattern_NoEscapes_ReturnsSameString() + { + Assert.Equal("comparator_tests", StatementExecutionConnection.DecodeLiteralPattern("comparator_tests")); + } + + [Fact] + public void DecodeLiteralPattern_EscapedUnderscore_ReturnsLiteralUnderscore() + { + Assert.Equal("comparator_tests", StatementExecutionConnection.DecodeLiteralPattern(@"comparator\_tests")); + } + + [Fact] + public void DecodeLiteralPattern_EscapedPercent_ReturnsLiteralPercent() + { + Assert.Equal("cat%log", StatementExecutionConnection.DecodeLiteralPattern(@"cat\%log")); + } + + [Fact] + public void DecodeLiteralPattern_EscapedBackslash_ReturnsLiteralBackslash() + { + Assert.Equal(@"back\slash", StatementExecutionConnection.DecodeLiteralPattern(@"back\\slash")); + } + + [Fact] + public void DecodeLiteralPattern_EmptyString_ReturnsEmptyString() + { + Assert.Equal("", StatementExecutionConnection.DecodeLiteralPattern("")); + } + + [Fact] + public void DecodeLiteralPattern_LoneTrailingBackslash_PreservedAsBackslash() + { + // A lone trailing backslash with nothing after it is preserved as-is. + Assert.Equal(@"test\", StatementExecutionConnection.DecodeLiteralPattern(@"test\")); + } + + // ---------------------------------------------------------------- + // Round-trip: ContainsUnescapedWildcard + CatalogPatternToRegex + // covering the six bug-report cases from PECO-3017 + // ---------------------------------------------------------------- + + [Fact] + public void BugReport_PercentPattern_DetectedAsWildcard() + { + // "%" should be recognized as a wildcard so GetSchemasAsync fetches all and filters. + Assert.True(StatementExecutionConnection.ContainsUnescapedWildcard("%")); + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("%"); + Assert.Matches(regex, "any_catalog"); + } + + [Fact] + public void BugReport_ComparPercent_DetectedAsWildcard() + { + // "compar%" should be a wildcard prefix match. + Assert.True(StatementExecutionConnection.ContainsUnescapedWildcard("compar%")); + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("compar%"); + Assert.Matches(regex, "comparator_tests"); + Assert.DoesNotMatch(regex, "main"); + } + + [Fact] + public void BugReport_EscapedUnderscore_NotAWildcard_DecodesCorrectly() + { + // "comparator\_tests": no wildcard → use literal catalog name. + Assert.False(StatementExecutionConnection.ContainsUnescapedWildcard(@"comparator\_tests")); + string literal = StatementExecutionConnection.DecodeLiteralPattern(@"comparator\_tests"); + Assert.Equal("comparator_tests", literal); + } + + [Fact] + public void BugReport_UnescapedUnderscore_IsSingleCharWildcard() + { + // "comparator_tests" (no escape): _ is a wildcard matching any single char. + Assert.True(StatementExecutionConnection.ContainsUnescapedWildcard("comparator_tests")); + Regex regex = StatementExecutionConnection.CatalogPatternToRegex("comparator_tests"); + // Matches both "comparator_tests" and "comparator-tests" + Assert.Matches(regex, "comparator_tests"); + Assert.Matches(regex, "comparator-tests"); + } + + [Fact] + public void BugReport_EmptyString_NotAWildcard() + { + // Empty string: no wildcard; GetSchemasAsync returns empty rowset immediately. + Assert.False(StatementExecutionConnection.ContainsUnescapedWildcard("")); + } + + [Fact] + public void BugReport_NonexistentCatalog_NotAWildcard() + { + // "nonexistent": no wildcard; GetSchemasAsync attempts direct query and + // catches the server error, returning an empty rowset. + Assert.False(StatementExecutionConnection.ContainsUnescapedWildcard("nonexistent")); + } + } +}