Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions java/src/main/java/org/lance/OpenDatasetBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ private Dataset buildFromNamespaceClient() {
// Call describe_table to get location and storage options
DescribeTableRequest request = new DescribeTableRequest();
request.setId(tableId);
// Only set version if present
options.getVersion().ifPresent(v -> request.setVersion(Long.valueOf(v)));
// Do not set the dataset version here. Some namespace implementations only support describing
// the latest table metadata; the requested version is applied when opening the dataset below.

DescribeTableResponse response = namespaceClient.describeTable(request);

Expand Down
48 changes: 48 additions & 0 deletions java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,33 @@ void testNamespaceId() {
"namespaceId should contain 'DirectoryNamespace', got: " + namespaceId);
}

@Test
void testOpenSpecificVersionDoesNotPassVersionToDescribeTable() throws Exception {
VersionRejectingNamespace versionRejectingNamespace =
new VersionRejectingNamespace(innerNamespaceClient);
namespaceClient = versionRejectingNamespace;
List<String> tableId = Arrays.asList("test_table");

namespaceClient.createTable(new CreateTableRequest().id(tableId), createTestTableData());
namespaceClient.insertIntoTable(
new InsertIntoTableRequest().id(tableId).mode("append"), createTestTableData());

try (Dataset versionOne =
Dataset.open()
.allocator(allocator)
.namespaceClient(namespaceClient)
.tableId(tableId)
.readOptions(new ReadOptions.Builder().setVersion(1L).build())
.build()) {
assertEquals(1, versionOne.version());
assertEquals(3, versionOne.countRows());
}

assertTrue(
versionRejectingNamespace.getDescribeTableCallCount() > 0,
"Expected describeTable to be called when opening through namespace");
}

@Test
void testCreateAndListNamespaces() {
// Create a namespace
Expand Down Expand Up @@ -1439,4 +1466,25 @@ private byte[] createVectorTableData(int numRows, int dim) throws Exception {
return out.toByteArray();
}
}

private static class VersionRejectingNamespace extends CustomNamespace {
private final AtomicInteger describeTableCallCount = new AtomicInteger();

VersionRejectingNamespace(DirectoryNamespace inner) {
super(inner);
}

@Override
public DescribeTableResponse describeTable(DescribeTableRequest request) {
describeTableCallCount.incrementAndGet();
assertNull(
request.getVersion(),
"Dataset version should be passed to dataset open, not describeTable");
return super.describeTable(request);
}

int getDescribeTableCallCount() {
return describeTableCallCount.get();
}
}
}
4 changes: 3 additions & 1 deletion python/python/lance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ def dataset(
"Both 'namespace_client' and 'table_id' must be provided together."
)

request = DescribeTableRequest(id=table_id, version=version)
# Resolve the latest table metadata here. The requested dataset version is
# applied by the lower-level dataset open path after namespace resolution.
request = DescribeTableRequest(id=table_id, version=None)
response = namespace_client.describe_table(request)

uri = response.location
Expand Down
43 changes: 43 additions & 0 deletions python/python/tests/test_namespace_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,49 @@ def test_external_manifest_store_invokes_namespace_apis(use_custom):
), "describe_table_version should be called once when opening version 1"


def test_dataset_namespace_open_does_not_pass_version_to_describe_table():
"""Dataset versions are applied to dataset open, not namespace describe_table."""

class VersionRejectingNamespace(CustomNamespace):
def __init__(self, inner: lance.namespace.DirectoryNamespace):
super().__init__(inner)
self.describe_versions = []

def describe_table(
self, request: DescribeTableRequest
) -> DescribeTableResponse:
self.describe_versions.append(request.version)
assert request.version is None
return super().describe_table(request)

with tempfile.TemporaryDirectory() as tmpdir:
inner_ns_client = lance.namespace.DirectoryNamespace(root=tmpdir)
ns_client = VersionRejectingNamespace(inner_ns_client)
table_id = ["test_table"]

table1 = pa.Table.from_pylist([{"a": 1}, {"a": 2}])
ds = lance.write_dataset(
table1, namespace_client=ns_client, table_id=table_id, mode="create"
)
assert ds.count_rows() == 2
assert ds.version == 1

table2 = pa.Table.from_pylist([{"a": 3}])
ds = lance.write_dataset(
table2, namespace_client=ns_client, table_id=table_id, mode="append"
)
assert ds.count_rows() == 3
assert ds.version == 2

version_one = lance.dataset(
namespace_client=ns_client, table_id=table_id, version=1
)
assert version_one.count_rows() == 2
assert version_one.version == 1
assert ns_client.describe_versions
assert all(version is None for version in ns_client.describe_versions)


@pytest.mark.skipif(
sys.platform == "win32",
reason="Windows file locking prevents reliable concurrent filesystem operations",
Expand Down
Loading