Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions lance_ray/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,11 @@ def add_columns(
the namespace.
transform: The transform to apply to the dataset. It support a lot of types,
see `LanceDB API doc https://lancedb.github.io/lance-python-doc/data-evolution.html ` for more details.
filter: The filter to apply to the dataset. It is not supported yet, will be
supported when `get_fragments` support filter see
filter: Not supported yet. Reserved for when `get_fragments` supports a
filter, see
`LanceDB API doc <https://lancedb.github.io/lance-python-doc/all-modules.html#lance.LanceDataset.get_fragments>`_.
Passing a non-None value raises ``NotImplementedError`` rather than
silently ignoring it.
read_columns: The columns from the original dataset to read.
reader_schema: The schema to use for the reader.
read_version: The version to read.
Expand All @@ -555,6 +557,13 @@ def add_columns(
batch_size: The batch size to use for the reader.
concurrency: The number of processes to use for the pool.
"""
if filter is not None:
raise NotImplementedError(
"add_columns does not support the 'filter' argument yet; it would be "
"silently ignored and every fragment would still be processed. Omit "
"'filter' until fragment filtering is supported."
)

validate_uri_or_namespace(uri, namespace_impl, table_id)

uri, storage_options = resolve_namespace_table(
Expand Down
10 changes: 10 additions & 0 deletions tests/test_basic_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,16 @@ def double_score(x: pa.RecordBatch) -> pa.RecordBatch:
assert df.columns.tolist() == ["id", "name", "age", "score", "new_column"]
assert (df["new_column"] == df["score"] * 2).all()

def test_add_columns_rejects_filter(self, temp_dir):
"""A non-None filter must raise instead of being silently ignored."""
path = Path(temp_dir) / "add_columns_filter.lance"
with pytest.raises(NotImplementedError, match="filter"):
lr.add_columns(
str(path),
transform={"new_column": "score * 2"},
filter="score > 1",
)


class TestNamespaceReadWrite:
"""Test cases for read/write with DirectoryNamespace."""
Expand Down
Loading