Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions csharp/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<Project>
<PropertyGroup>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
<CentralPackageTransitivePinningEnabled>true</CentralPackageTransitivePinningEnabled>
</PropertyGroup>
<ItemGroup>
<!-- Compression -->
Expand All @@ -33,6 +34,7 @@
<PackageVersion Include="BenchmarkDotNet" Version="0.14.0" />
<!-- System -->
<PackageVersion Include="System.Collections.Immutable" Version="8.0.0" />
<PackageVersion Include="System.Memory" Version="4.6.3" />
<PackageVersion Include="System.Reflection.Metadata" Version="8.0.0" />
<PackageVersion Include="System.Net.Http" Version="4.3.4" />
<!-- Resilience -->
Expand All @@ -44,5 +46,7 @@
<!-- Protobuf (for telemetry schema testing) -->
<PackageVersion Include="Google.Protobuf" Version="3.25.1" />
<PackageVersion Include="Grpc.Tools" Version="2.60.0" />
<!-- Telemetry -->
<PackageVersion Include="OpenTelemetry.Api" Version="1.15.3" />
</ItemGroup>
</Project>
16 changes: 14 additions & 2 deletions csharp/src/AdbcDrivers.Databricks.csproj
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks Condition="'$(IsWindows)'=='true'">netstandard2.0;net472;net8.0</TargetFrameworks>
<TargetFrameworks Condition="'$(TargetFrameworks)'==''">netstandard2.0;net8.0</TargetFrameworks>
<TargetFrameworks Condition="'$(IsWindows)'=='true'">netstandard2.0;net472;net8.0;net10.0</TargetFrameworks>
<TargetFrameworks Condition="'$(TargetFrameworks)'==''">netstandard2.0;net8.0;net10.0</TargetFrameworks>
<PackageReadmeFile>readme.md</PackageReadmeFile>
</PropertyGroup>

<!--
Verify the driver is AOT/trim-safe under net10.0 without yet producing a
native build. IsAotCompatible turns on the trim, single-file, and AOT
Roslyn analyzers (and marks the assembly IsTrimmable) so a normal build
surfaces IL2xxx/IL3xxx warnings; with TreatWarningsAsErrors they fail the
build. The actual NativeAOT publish will live in a separate project.
-->
<PropertyGroup Condition="'$(TargetFramework)'=='net10.0'">
<IsAotCompatible>true</IsAotCompatible>
<EnableNETAnalyzers>true</EnableNETAnalyzers>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="K4os.Compression.LZ4" />
<PackageReference Include="K4os.Compression.LZ4.Streams" />
Expand Down
169 changes: 129 additions & 40 deletions csharp/src/ComplexTypeSerializingStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Threading;
using System.Threading.Tasks;
using AdbcDrivers.Databricks.StatementExecution;
using Apache.Arrow;
using Apache.Arrow.Adbc.Extensions;
using Apache.Arrow.Ipc;
using Apache.Arrow.Types;
using AdbcDrivers.Databricks.StatementExecution;

namespace AdbcDrivers.Databricks
{
Expand Down Expand Up @@ -75,7 +76,7 @@ internal sealed class ComplexTypeSerializingStream : IArrowArrayStream
// a double quote becomes \" (not ") and non-ASCII / < > & are emitted verbatim
// rather than \uXXXX-escaped. The output is still valid JSON; "unsafe" only refers to
// embedding directly in HTML, which is the consuming application's concern, not ours.
private static readonly JsonSerializerOptions JsonOptions = new JsonSerializerOptions
private static readonly JsonWriterOptions WriterOptions = new JsonWriterOptions
{
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
};
Expand Down Expand Up @@ -139,12 +140,27 @@ private RecordBatch ConvertComplexColumns(RecordBatch batch)
private static StringArray SerializeToStringArray(IArrowArray array)
{
StringArray.Builder builder = new StringArray.Builder();

// Write each value with a manual Utf8JsonWriter rather than JsonSerializer.Serialize.
// The value graph (ToObject) is a closed set of Arrow scalar types, lists, and
// dictionaries, so we can emit it without reflection — keeping this trim- and
// NativeAOT-safe. A single stream/writer pair is reused across rows to avoid
// per-row allocations.
using MemoryStream stream = new MemoryStream();
using Utf8JsonWriter writer = new Utf8JsonWriter(stream, WriterOptions);
for (int i = 0; i < array.Length; i++)
{
if (array.IsNull(i))
{
builder.AppendNull();
else
builder.Append(JsonSerializer.Serialize(ToObject(array, i), JsonOptions));
continue;
}

stream.SetLength(0);
writer.Reset(stream);
SerializeStructuredValue(writer, array, i);
writer.Flush();
builder.Append(Encoding.UTF8.GetString(stream.GetBuffer(), 0, (int)stream.Length));
}
return builder.Build();
}
Expand Down Expand Up @@ -175,53 +191,123 @@ private static HashSet<int> DetectComplexColumns(Schema schema)

// --- JSON serialization helpers ---

private static object? ToObject(IArrowArray array, int index)
private static void SerializeStructuredValue(Utf8JsonWriter writer, IArrowArray array, int index)
{
if (array.IsNull(index))
return null;
{
writer.WriteNullValue();
return;
}

// Handle complex types with recursive traversal, and types needing specific
// string formatting. All other primitives delegate to ValueAt().
return array switch
switch (array.Data.DataType.TypeId)
{
ListArray la => ToListOrMap(la, index),
StructArray sa => ToDict(sa, index),
case ArrowTypeId.List:
case ArrowTypeId.Map: SerializeListOrMap(writer, (ListArray)array, index); break;
case ArrowTypeId.Struct: SerializeDict(writer, (StructArray)array, index); break;
// DECIMAL: emit as a bare JSON number (not a quoted string) so the output matches
// the JDBC driver and is valid JSON. The decimal's string form is written raw so
// values beyond C# decimal's ~28-digit range (DECIMAL(38, …)) keep full precision.
Decimal128Array dec => RawNumber(dec.GetString(index)),
Decimal256Array dec => RawNumber(dec.GetString(index)),
Date32Array d32 => d32.GetDateTime(index)?.ToString("yyyy-MM-dd"),
case ArrowTypeId.Decimal32: writer.WriteRawValue(((Decimal32Array)array).GetString(index)); break;
case ArrowTypeId.Decimal64: writer.WriteRawValue(((Decimal64Array)array).GetString(index)); break;
case ArrowTypeId.Decimal128: writer.WriteRawValue(((Decimal128Array)array).GetString(index)); break;
case ArrowTypeId.Decimal256: writer.WriteRawValue(((Decimal256Array)array).GetString(index)); break;
case ArrowTypeId.Date32: writer.WriteStringValue(((Date32Array)array).GetDateTime(index)!.Value.ToString("yyyy-MM-dd")); break;
case ArrowTypeId.Date64: writer.WriteStringValue(((Date64Array)array).GetDateTime(index)!.Value.ToString("yyyy-MM-dd")); break;
// INTERVAL: native YearMonth/Duration arrays serialize to {} via System.Text.Json
// (no public properties). Render the same "Y-M" / "D HH:MM:SS.nnnnnnnnn" strings
// IntervalSerializingStream produces for top-level interval columns.
YearMonthIntervalArray ym => IntervalSerializingStream.FormatYearMonth(ym.GetValue(index)!.Value.Months),
DurationArray dur => IntervalSerializingStream.FormatDuration(dur.GetValue(index)!.Value, ((DurationType)dur.Data.DataType).Unit),
_ => array.ValueAt(index, StructResultType.Object) // int, long, double, float, bool, string, timestamp, etc.
};
}
case ArrowTypeId.Interval:
switch (((IntervalType)array.Data.DataType).Unit)
{
case IntervalUnit.DayTime:
var dayTime = ((DayTimeIntervalArray)array).GetValue(index)!.Value;
var timeSpan = TimeSpan.FromDays(dayTime.Days) + TimeSpan.FromMilliseconds(dayTime.Milliseconds);
writer.WriteStringValue(timeSpan.ToString());
break;
case IntervalUnit.MonthDayNanosecond:
var monthDayNano = ((MonthDayNanosecondIntervalArray)array).GetValue(index)!.Value;
timeSpan = TimeSpan.FromDays(monthDayNano.Days) + TimeSpan.FromTicks(monthDayNano.Nanoseconds / 100);
writer.WriteStringValue(IntervalSerializingStream.FormatYearMonth(monthDayNano.Months) + " " + timeSpan.ToString());
break;
case IntervalUnit.YearMonth:
writer.WriteStringValue(IntervalSerializingStream.FormatYearMonth(
((YearMonthIntervalArray)array).GetValue(index)!.Value.Months));
break;
default: writer.WriteNullValue(); break;
}
break;
case ArrowTypeId.Duration:
DurationArray dur = (DurationArray)array;
writer.WriteStringValue(IntervalSerializingStream.FormatDuration(dur.GetValue(index)!.Value, ((DurationType)dur.Data.DataType).Unit));
break;

/// <summary>
/// Wraps a numeric string as a raw JSON number node so <see cref="JsonSerializer"/> emits it
/// unquoted (e.g. <c>1</c>, not <c>"1"</c>) with full precision.
/// </summary>
private static JsonNode? RawNumber(string? numericText) =>
numericText == null ? null : JsonNode.Parse(numericText);
case ArrowTypeId.Boolean: writer.WriteBooleanValue(((BooleanArray)array).GetValue(index)!.Value); break;
case ArrowTypeId.Double: writer.WriteNumberValue(((DoubleArray)array).GetValue(index)!.Value); break;
case ArrowTypeId.Float: writer.WriteNumberValue(((FloatArray)array).GetValue(index)!.Value); break;
#if NET5_0_OR_GREATER
case ArrowTypeId.HalfFloat:
var halfValue = ((HalfFloatArray)array).GetValue(index)!.Value;
writer.WriteNumberValue((double)halfValue); break;
#endif
case ArrowTypeId.Int8: writer.WriteNumberValue(((Int8Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.Int16: writer.WriteNumberValue(((Int16Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.Int32: writer.WriteNumberValue(((Int32Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.Int64: writer.WriteNumberValue(((Int64Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.String: writer.WriteStringValue(((StringArray)array).GetString(index)); break;
case ArrowTypeId.LargeString: writer.WriteStringValue(((LargeStringArray)array).GetString(index)); break;
#if NET6_0_OR_GREATER
case ArrowTypeId.Time32: writer.WriteRawValue(((Time32Array)array).GetTime(index)!.Value.ToString("HH:mm:ss.ffffff")); break;
case ArrowTypeId.Time64: writer.WriteRawValue(((Time64Array)array).GetTime(index)!.Value.ToString("HH:mm:ss.ffffff")); break;
Comment thread
CurtHagenlocher marked this conversation as resolved.
Outdated
#else
case ArrowTypeId.Time32:
Time32Array time32Array = (Time32Array)array;
int time32 = time32Array.GetValue(index)!.Value;
switch (((Time32Type)time32Array.Data.DataType).Unit)
{
case TimeUnit.Second: writer.WriteStringValue(TimeSpan.FromSeconds(time32).ToString()); break;
case TimeUnit.Millisecond: writer.WriteStringValue(TimeSpan.FromMilliseconds(time32).ToString()); break;
default: writer.WriteNullValue(); break;
};
break;
case ArrowTypeId.Time64:
Time64Array time64Array = (Time64Array)array;
long time64 = time64Array.GetValue(index)!.Value;
switch (((Time64Type)time64Array.Data.DataType).Unit)
{
case TimeUnit.Microsecond: writer.WriteStringValue(TimeSpan.FromTicks(time64 * 10).ToString()); break;
case TimeUnit.Nanosecond: writer.WriteStringValue(TimeSpan.FromTicks(time64 / 100).ToString()); break;
default: writer.WriteNullValue(); break;
};
break;
#endif
case ArrowTypeId.Timestamp: writer.WriteStringValue(((TimestampArray)array).GetTimestamp(index)!.Value); break;
case ArrowTypeId.UInt8: writer.WriteNumberValue(((UInt8Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.UInt16: writer.WriteNumberValue(((UInt16Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.UInt32: writer.WriteNumberValue(((UInt32Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.UInt64: writer.WriteNumberValue(((UInt64Array)array).GetValue(index)!.Value); break;
case ArrowTypeId.Binary: writer.WriteBase64StringValue(((BinaryArray)array).GetBytes(index)); break;
default: writer.WriteNullValue(); break;
}
}

private static object ToListOrMap(ListArray listArray, int index)
private static void SerializeListOrMap(Utf8JsonWriter writer, ListArray listArray, int index)
{
IArrowArray values = listArray.Values;
int start = (int)listArray.ValueOffsets[index];
int end = (int)listArray.ValueOffsets[index + 1];

// Arrow MAP is stored as List<Struct<key, value>>
if (values is StructArray structValues && IsMapStruct(structValues))
return ToMapDict(structValues, start, end);
{
SerializeMapDict(writer, structValues, start, end);
return;
}

writer.WriteStartArray();
List<object?> list = new List<object?>();
for (int i = start; i < end; i++)
list.Add(ToObject(values, i));
return list;
SerializeStructuredValue(writer, values, i);
writer.WriteEndArray();
}

private static bool IsMapStruct(StructArray structArray)
Expand All @@ -232,28 +318,31 @@ private static bool IsMapStruct(StructArray structArray)
type.Fields[1].Name == "value";
}

private static SortedDictionary<string, object?> ToMapDict(StructArray entries, int start, int end)
private static void SerializeMapDict(Utf8JsonWriter writer, StructArray entries, int start, int end)
{
IArrowArray keyArray = entries.Fields[0];
IArrowArray valueArray = entries.Fields[1];
// Use SortedDictionary for deterministic key ordering in the JSON output
SortedDictionary<string, object?> result = new SortedDictionary<string, object?>();
writer.WriteStartObject();
for (int i = start; i < end; i++)
{
// Convert any key type to its string representation; treat null keys as "null"
string key = ToObject(keyArray, i)?.ToString() ?? "null";
result[key] = ToObject(valueArray, i);
string key = keyArray.ValueAt(i)?.ToString() ?? "null";
writer.WritePropertyName(key);
SerializeStructuredValue(writer, valueArray, i);
}
return result;
writer.WriteEndObject();
}

private static Dictionary<string, object?> ToDict(StructArray structArray, int index)
private static void SerializeDict(Utf8JsonWriter writer, StructArray structArray, int index)
{
StructType type = (StructType)structArray.Data.DataType;
Dictionary<string, object?> dict = new Dictionary<string, object?>();
writer.WriteStartObject();
for (int i = 0; i < type.Fields.Count; i++)
dict[type.Fields[i].Name] = ToObject(structArray.Fields[i], index);
return dict;
{
writer.WritePropertyName(type.Fields[i].Name);
SerializeStructuredValue(writer, structArray.Fields[i], index);
}
writer.WriteEndObject();
}
}
}
38 changes: 38 additions & 0 deletions csharp/src/DatabricksConfigJsonContext.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2026 ADBC Drivers Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System.Collections.Generic;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace AdbcDrivers.Databricks
{
/// <summary>
/// System.Text.Json source-generated metadata for the free-form configuration file, which is
/// read as a flat <see cref="Dictionary{TKey,TValue}"/> of string-to-string. Using the
/// generated context keeps the deserialization trim- and NativeAOT-safe. The options mirror
/// the prior runtime options: case-insensitive property names, comments skipped, and trailing
/// commas allowed.
/// </summary>
[JsonSourceGenerationOptions(
PropertyNameCaseInsensitive = true,
ReadCommentHandling = JsonCommentHandling.Skip,
AllowTrailingCommas = true)]
[JsonSerializable(typeof(Dictionary<string, string>))]
internal partial class DatabricksConfigJsonContext : JsonSerializerContext
{
}
}
12 changes: 4 additions & 8 deletions csharp/src/DatabricksConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,11 @@ public static DatabricksConfiguration FromFile(string filePath)
try
{
string json = File.ReadAllText(filePath);
var options = new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true,
ReadCommentHandling = JsonCommentHandling.Skip,
AllowTrailingCommas = true
};

// Deserialize as flat dictionary (free-form JSON)
var properties = JsonSerializer.Deserialize<Dictionary<string, string>>(json, options);
// Deserialize as flat dictionary (free-form JSON). The source-generated context
// (case-insensitive, comments skipped, trailing commas allowed) keeps this
// trim- and NativeAOT-safe.
var properties = JsonSerializer.Deserialize(json, DatabricksConfigJsonContext.Default.DictionaryStringString);
if (properties == null)
{
throw new InvalidOperationException($"Failed to deserialize configuration from {filePath}");
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/DatabricksStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ protected override async Task<QueryResult> GetColumnsExtendedAsync(CancellationT
{
throw new FormatException($"Invalid json result of {query}: result is null or empty");
}
var result = JsonSerializer.Deserialize<DescTableExtendedResult>(resultJson!);
var result = JsonSerializer.Deserialize(resultJson!, DescTableJsonContext.Default.DescTableExtendedResult);
if (result == null)
{
throw new FormatException($"Invalid json result of {query}.Result={resultJson}");
Expand Down
Loading
Loading