From 78ba76963fe20445e4881661d7407763fef0c196 Mon Sep 17 00:00:00 2001 From: fanng <“fanng@apache.org”> Date: Fri, 12 Jun 2026 21:45:00 +0800 Subject: [PATCH 1/2] fix: avoid versioned describe table for namespace opens --- java/src/main/java/org/lance/OpenDatasetBuilder.java | 4 ++-- python/python/lance/__init__.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/org/lance/OpenDatasetBuilder.java b/java/src/main/java/org/lance/OpenDatasetBuilder.java index baece0767a1..32fd5ca7635 100644 --- a/java/src/main/java/org/lance/OpenDatasetBuilder.java +++ b/java/src/main/java/org/lance/OpenDatasetBuilder.java @@ -216,8 +216,8 @@ private Dataset buildFromNamespaceClient() { // Call describe_table to get location and storage options DescribeTableRequest request = new DescribeTableRequest(); request.setId(tableId); - // Only set version if present - options.getVersion().ifPresent(v -> request.setVersion(Long.valueOf(v))); + // Do not set the dataset version here. Some namespace implementations only support describing + // the latest table metadata; the requested version is applied when opening the dataset below. DescribeTableResponse response = namespaceClient.describeTable(request); diff --git a/python/python/lance/__init__.py b/python/python/lance/__init__.py index f58b169a47a..be99eb05cc5 100644 --- a/python/python/lance/__init__.py +++ b/python/python/lance/__init__.py @@ -230,7 +230,9 @@ def dataset( "Both 'namespace_client' and 'table_id' must be provided together." ) - request = DescribeTableRequest(id=table_id, version=version) + # Resolve the latest table metadata here. The requested dataset version is + # applied by the lower-level dataset open path after namespace resolution. + request = DescribeTableRequest(id=table_id, version=None) response = namespace_client.describe_table(request) uri = response.location From 9d237de9dd1adaceff492b1b09d7d12508ca6b9f Mon Sep 17 00:00:00 2001 From: fanng <“fanng@apache.org”> Date: Mon, 15 Jun 2026 14:32:36 +0800 Subject: [PATCH 2/2] test: cover namespace opens with dataset versions --- .../namespace/DirectoryNamespaceTest.java | 48 +++++++++++++++++++ python/python/tests/test_namespace_dir.py | 43 +++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index f425ddcc4f9..c622bac9fcd 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -189,6 +189,33 @@ void testNamespaceId() { "namespaceId should contain 'DirectoryNamespace', got: " + namespaceId); } + @Test + void testOpenSpecificVersionDoesNotPassVersionToDescribeTable() throws Exception { + VersionRejectingNamespace versionRejectingNamespace = + new VersionRejectingNamespace(innerNamespaceClient); + namespaceClient = versionRejectingNamespace; + List tableId = Arrays.asList("test_table"); + + namespaceClient.createTable(new CreateTableRequest().id(tableId), createTestTableData()); + namespaceClient.insertIntoTable( + new InsertIntoTableRequest().id(tableId).mode("append"), createTestTableData()); + + try (Dataset versionOne = + Dataset.open() + .allocator(allocator) + .namespaceClient(namespaceClient) + .tableId(tableId) + .readOptions(new ReadOptions.Builder().setVersion(1L).build()) + .build()) { + assertEquals(1, versionOne.version()); + assertEquals(3, versionOne.countRows()); + } + + assertTrue( + versionRejectingNamespace.getDescribeTableCallCount() > 0, + "Expected describeTable to be called when opening through namespace"); + } + @Test void testCreateAndListNamespaces() { // Create a namespace @@ -1439,4 +1466,25 @@ private byte[] createVectorTableData(int numRows, int dim) throws Exception { return out.toByteArray(); } } + + private static class VersionRejectingNamespace extends CustomNamespace { + private final AtomicInteger describeTableCallCount = new AtomicInteger(); + + VersionRejectingNamespace(DirectoryNamespace inner) { + super(inner); + } + + @Override + public DescribeTableResponse describeTable(DescribeTableRequest request) { + describeTableCallCount.incrementAndGet(); + assertNull( + request.getVersion(), + "Dataset version should be passed to dataset open, not describeTable"); + return super.describeTable(request); + } + + int getDescribeTableCallCount() { + return describeTableCallCount.get(); + } + } } diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 1991b82946e..b7315f20356 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -979,6 +979,49 @@ def test_external_manifest_store_invokes_namespace_apis(use_custom): ), "describe_table_version should be called once when opening version 1" +def test_dataset_namespace_open_does_not_pass_version_to_describe_table(): + """Dataset versions are applied to dataset open, not namespace describe_table.""" + + class VersionRejectingNamespace(CustomNamespace): + def __init__(self, inner: lance.namespace.DirectoryNamespace): + super().__init__(inner) + self.describe_versions = [] + + def describe_table( + self, request: DescribeTableRequest + ) -> DescribeTableResponse: + self.describe_versions.append(request.version) + assert request.version is None + return super().describe_table(request) + + with tempfile.TemporaryDirectory() as tmpdir: + inner_ns_client = lance.namespace.DirectoryNamespace(root=tmpdir) + ns_client = VersionRejectingNamespace(inner_ns_client) + table_id = ["test_table"] + + table1 = pa.Table.from_pylist([{"a": 1}, {"a": 2}]) + ds = lance.write_dataset( + table1, namespace_client=ns_client, table_id=table_id, mode="create" + ) + assert ds.count_rows() == 2 + assert ds.version == 1 + + table2 = pa.Table.from_pylist([{"a": 3}]) + ds = lance.write_dataset( + table2, namespace_client=ns_client, table_id=table_id, mode="append" + ) + assert ds.count_rows() == 3 + assert ds.version == 2 + + version_one = lance.dataset( + namespace_client=ns_client, table_id=table_id, version=1 + ) + assert version_one.count_rows() == 2 + assert version_one.version == 1 + assert ns_client.describe_versions + assert all(version is None for version in ns_client.describe_versions) + + @pytest.mark.skipif( sys.platform == "win32", reason="Windows file locking prevents reliable concurrent filesystem operations",