Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

using System.Text;
using System.Text.RegularExpressions;

namespace AdbcDrivers.Databricks.StatementExecution.MetadataCommands
{
Expand Down Expand Up @@ -99,5 +100,95 @@ protected static void AppendCatalogScope(StringBuilder sql, string? catalog)
else
sql.Append(string.Format(InCatalogFormat, QuoteIdentifier(catalog)));
}

/// <summary>
/// Returns true when <paramref name="pattern"/> contains a SQL LIKE wildcard
/// (% or _) that is NOT escaped by a preceding backslash. JDBC metadata APIs
/// treat catalog/schema/table arguments as LIKE patterns, but SEA SHOW commands
/// take literal identifiers, so callers must expand wildcards client-side.
/// </summary>
internal static bool ContainsUnescapedWildcard(string? pattern)
{
if (string.IsNullOrEmpty(pattern))
return false;
Comment thread
eric-wang-1990 marked this conversation as resolved.

bool escapeNext = false;
for (int i = 0; i < pattern!.Length; i++)
{
char c = pattern[i];
if (c == '\\')
{
// Two backslashes in a row are an escaped backslash literal, not an escape.
if (i + 1 < pattern.Length && pattern[i + 1] == '\\')
{
i++;
continue;
}
escapeNext = !escapeNext;
}
else if (escapeNext)
{
escapeNext = false;
}
else if (c == '%' || c == '_')
{
return true;
}
}
return false;
}

/// <summary>
/// Returns true when <paramref name="pattern"/> is a pure "match anything"
/// pattern: a single unescaped % (or *). These can be optimised to
/// SHOW SCHEMAS IN ALL CATALOGS without enumerating catalogs.
/// </summary>
internal static bool IsMatchAnything(string? pattern)
{
return pattern == "%" || pattern == "*";
}

/// <summary>
/// Compiles a JDBC LIKE pattern (with <c>%</c> / <c>_</c> wildcards and
/// <c>\</c> escapes, where <c>\%</c> / <c>\_</c> / <c>\\</c> are literal)
/// into a <see cref="Regex"/> for client-side filtering. Anchored at both
/// ends; case-sensitive. Used when wildcard expansion has to happen on
/// the driver side (e.g. catalog patterns on SEA — PECO-3035).
/// </summary>
internal static Regex JdbcLikeToRegex(string pattern)
{
var sb = new StringBuilder("^");
bool escapeNext = false;
for (int i = 0; i < pattern.Length; i++)
{
char c = pattern[i];
if (c == '\\')
{
// Two backslashes → literal backslash.
if (i + 1 < pattern.Length && pattern[i + 1] == '\\')
{
sb.Append("\\\\");
i++;
continue;
}
escapeNext = !escapeNext;
continue;
}
if (escapeNext)
{
sb.Append(Regex.Escape(c.ToString()));
escapeNext = false;
continue;
}
switch (c)
{
case '%': sb.Append(".*"); break;
case '_': sb.Append("."); break;
default: sb.Append(Regex.Escape(c.ToString())); break;
}
}
sb.Append("$");
return new Regex(sb.ToString());
}
}
}
136 changes: 89 additions & 47 deletions csharp/src/StatementExecution/StatementExecutionConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -712,53 +712,10 @@ async Task<IReadOnlyList<string>> IGetObjectsDataProvider.GetCatalogsAsync(strin

async Task<IReadOnlyList<(string catalog, string schema)>> IGetObjectsDataProvider.GetSchemasAsync(string? catalogPattern, string? schemaPattern, CancellationToken cancellationToken)
{
// Note: catalogPattern comes from GetObjectsResultBuilder which resolves individual
// catalog names before calling this method. Despite the "pattern" name (from the
// IGetObjectsDataProvider interface), the value passed to ShowSchemasCommand is used
// as a literal catalog identifier (backtick-quoted), not a wildcard pattern.
string sql = new ShowSchemasCommand(catalogPattern, schemaPattern).Build();

List<RecordBatch> batches;
try
{
batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);
}
catch (DatabricksException ex) when (ex.IsObjectNotFoundException())
{
return System.Array.Empty<(string, string)>();
}

// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog
// SHOW SCHEMAS IN `catalog` returns 1 column: databaseName
bool showSchemasInAllCatalogs = catalogPattern == null;

var result = new List<(string, string)>();
foreach (var batch in batches)
{
StringArray? catalogArray = null;
StringArray? schemaArray = null;

if (showSchemasInAllCatalogs)
{
schemaArray = batch.Column(0) as StringArray;
catalogArray = batch.Column(1) as StringArray;
}
else
{
schemaArray = batch.Column(0) as StringArray;
}

if (schemaArray == null) continue;
for (int i = 0; i < batch.Length; i++)
{
if (schemaArray.IsNull(i)) continue;
string catalog = catalogArray != null && !catalogArray.IsNull(i)
? catalogArray.GetString(i)
: catalogPattern ?? "";
result.Add((catalog, schemaArray.GetString(i)));
}
}
return result;
// PECO-3035: catalogPattern follows JDBC LIKE semantics (% / _ / \_ ). The SEA
// backend treats SHOW SCHEMAS IN `<catalog>` as a literal lookup, so we resolve
// wildcards client-side here (see ListSchemasAsync for details).
return await ListSchemasAsync(catalogPattern, schemaPattern, cancellationToken).ConfigureAwait(false);
}

async Task<IReadOnlyList<(string catalog, string schema, string table, string tableType)>> IGetObjectsDataProvider.GetTablesAsync(
Expand Down Expand Up @@ -903,6 +860,91 @@ internal List<RecordBatch> ExecuteMetadataSql(string sql, CancellationToken canc
return ExecuteMetadataSqlAsync(sql, cancellationToken).GetAwaiter().GetResult();
}

/// <summary>
/// Executes SHOW SCHEMAS with JDBC-style catalog pattern semantics. The SEA
/// backend treats <c>SHOW SCHEMAS IN `<catalog>`</c> as a literal identifier
/// lookup — it does not expand <c>%</c> / <c>_</c> wildcards. To match Thrift
/// behaviour (PECO-3035), this helper resolves wildcards client-side:
/// <list type="bullet">
/// <item><description><c>null</c> or "%"/"*" → <c>SHOW SCHEMAS IN ALL CATALOGS</c> (single round-trip).</description></item>
/// <item><description>A pattern containing unescaped <c>%</c> or <c>_</c> → still a single <c>SHOW SCHEMAS IN ALL CATALOGS</c>, then filter the returned <c>catalog</c> column against the JDBC pattern client-side.</description></item>
/// <item><description>A literal name → single <c>SHOW SCHEMAS IN `&lt;catalog&gt;`</c> call (avoids fetching everything just to throw most of it away).</description></item>
/// </list>
/// Returns a flat list of <c>(catalog, schema)</c> pairs in the order produced
/// by the backend (no client-side sorting).
/// </summary>
Comment thread
eric-wang-1990 marked this conversation as resolved.
internal async Task<List<(string catalog, string schema)>> ListSchemasAsync(
string? catalogPattern, string? schemaPattern, CancellationToken cancellationToken)
{
// Fast path: null or pure "match anything" → SHOW SCHEMAS IN ALL CATALOGS.
if (catalogPattern == null || MetadataCommands.MetadataCommandBase.IsMatchAnything(catalogPattern))
{
return await ExecuteShowSchemasAsync(null, schemaPattern, cancellationToken).ConfigureAwait(false);
}

// Wildcard pattern: fetch all (catalog, schema) pairs in one round-trip and
// filter by the catalog pattern client-side. Avoids the N+1 round-trip cost
// of enumerating catalogs and querying per-catalog.
if (MetadataCommands.MetadataCommandBase.ContainsUnescapedWildcard(catalogPattern))
{
var all = await ExecuteShowSchemasAsync(null, schemaPattern, cancellationToken).ConfigureAwait(false);
var catalogRegex = MetadataCommands.MetadataCommandBase.JdbcLikeToRegex(catalogPattern);
var filtered = new List<(string, string)>(all.Count);
foreach (var row in all)
{
if (catalogRegex.IsMatch(row.catalog))
filtered.Add(row);
}
return filtered;
}

// Literal catalog name.
return await ExecuteShowSchemasAsync(catalogPattern, schemaPattern, cancellationToken).ConfigureAwait(false);
}

/// <summary>
/// Issues a single SHOW SCHEMAS command (with the given literal catalog or
/// <c>IN ALL CATALOGS</c>) and decodes the result. Caller is responsible for
/// resolving wildcard patterns before calling this method.
/// </summary>
private async Task<List<(string catalog, string schema)>> ExecuteShowSchemasAsync(
string? catalog, string? schemaPattern, CancellationToken cancellationToken)
{
string sql = new ShowSchemasCommand(catalog, schemaPattern).Build();
List<RecordBatch> batches;
try
{
batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);
}
catch (DatabricksException ex) when (ex.IsObjectNotFoundException())
{
return new List<(string, string)>();
}

// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog
// SHOW SCHEMAS IN `catalog` returns 1 column: databaseName
var result = new List<(string, string)>();
foreach (var batch in batches)
{
var schemaArray = TryGetColumn<StringArray>(batch, "databaseName");
if (schemaArray == null) continue;

// catalog column is only present in the IN ALL CATALOGS shape;
// for the literal-catalog shape we synthesize it from the parameter.
var catalogArray = TryGetColumn<StringArray>(batch, "catalog");

for (int i = 0; i < batch.Length; i++)
{
if (schemaArray.IsNull(i)) continue;
string cat = catalogArray != null && !catalogArray.IsNull(i)
? catalogArray.GetString(i)
: catalog ?? "";
result.Add((cat, schemaArray.GetString(i)));
}
}
return result;
}

/// <summary>
/// Executes a SHOW COLUMNS command. When catalog is null, iterates over all catalogs
/// since SHOW COLUMNS IN ALL CATALOGS is not yet supported by the backend.
Expand Down
59 changes: 11 additions & 48 deletions csharp/src/StatementExecution/StatementExecutionStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1076,62 +1076,25 @@ private async Task<QueryResult> GetSchemasAsync(CancellationToken cancellationTo
&& MetadataUtilities.NormalizeSparkCatalog(_metadataCatalogName) != null)
return MetadataSchemaFactory.CreateEmptySchemasResult();

string sql = new ShowSchemasCommand(
// PECO-3035: catalog follows JDBC LIKE semantics. ListSchemasAsync expands
// wildcards client-side (SHOW SCHEMAS IN ALL CATALOGS or per-catalog dispatch)
// and returns a flat list of (catalog, schema) pairs.
var rows = await _connection.ListSchemasAsync(
catalog,
EscapePatternWildcardsInName(_metadataSchemaName)).Build();
activity?.SetTag("sql_query", sql);

List<RecordBatch> batches;
try
{
batches = await _connection.ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);
}
catch (DatabricksException ex) when (ex.IsObjectNotFoundException())
{
activity?.AddEvent("statement.get_schemas.object_not_found", [
new("error", ex.Message)
]);
return MetadataSchemaFactory.CreateEmptySchemasResult();
}

// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog
// SHOW SCHEMAS IN `catalog` returns 1 column: databaseName
bool showAllCatalogs = catalog == null;
EscapePatternWildcardsInName(_metadataSchemaName),
cancellationToken).ConfigureAwait(false);

var tableSchemaBuilder = new StringArray.Builder();
var tableCatalogBuilder = new StringArray.Builder();
int count = 0;
foreach (var batch in batches)
foreach (var (cat, schemaName) in rows)
{
StringArray? catalogArray = null;
StringArray? schemaArray = null;

if (showAllCatalogs)
{
schemaArray = batch.Column(0) as StringArray;
catalogArray = batch.Column(1) as StringArray;
}
else
{
schemaArray = batch.Column(0) as StringArray;
}

if (schemaArray == null) continue;
for (int i = 0; i < batch.Length; i++)
{
if (schemaArray.IsNull(i)) continue;
tableSchemaBuilder.Append(schemaArray.GetString(i));
string catalogValue = catalogArray != null && !catalogArray.IsNull(i)
? catalogArray.GetString(i)
: catalog ?? "";
tableCatalogBuilder.Append(catalogValue);
count++;
}
tableSchemaBuilder.Append(schemaName);
tableCatalogBuilder.Append(cat);
}

activity?.SetTag("result_count", count);
activity?.SetTag("result_count", rows.Count);
var schema = MetadataSchemaFactory.CreateSchemasSchema();
return new QueryResult(count, new HiveInfoArrowStream(schema, new IArrowArray[]
return new QueryResult(rows.Count, new HiveInfoArrowStream(schema, new IArrowArray[]
{
tableSchemaBuilder.Build(), tableCatalogBuilder.Build()
}));
Expand Down
Loading
Loading