From 75fd8e2ff42c2f7e4552d42bfe03cf81ee81cefb Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 17 Jun 2026 22:15:33 +0800
Subject: [PATCH] fix: reject unsupported filter argument in add_columns

The filter parameter was accepted and documented but never used, so any
filter was silently dropped and every fragment was processed. Raise
NotImplementedError instead of silently ignoring it.
---
 lance_ray/io.py                | 13 +++++++++++--
 tests/test_basic_read_write.py | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/lance_ray/io.py b/lance_ray/io.py
index 270ab287..cc1dd500 100644
--- a/lance_ray/io.py
+++ b/lance_ray/io.py
@@ -536,9 +536,11 @@ def add_columns(
             the namespace.
         transform: The transform to apply to the dataset. It support a lot of types,
             see `LanceDB API doc https://lancedb.github.io/lance-python-doc/data-evolution.html ` for more details.
-        filter: The filter to apply to the dataset. It is not supported yet, will be
-            supported when `get_fragments` support filter see
+        filter: Not supported yet. Reserved for when `get_fragments` supports a
+            filter, see
             `LanceDB API doc <https://lancedb.github.io/lance-python-doc/all-modules.html#lance.LanceDataset.get_fragments>`_.
+            Passing a non-None value raises ``NotImplementedError`` rather than
+            silently ignoring it.
         read_columns: The columns from the original dataset to read.
         reader_schema: The schema to use for the reader.
         read_version: The version to read.
@@ -555,6 +557,13 @@ def add_columns(
         batch_size: The batch size to use for the reader.
         concurrency: The number of processes to use for the pool.
     """
+    if filter is not None:
+        raise NotImplementedError(
+            "add_columns does not support the 'filter' argument yet; it would be "
+            "silently ignored and every fragment would still be processed. Omit "
+            "'filter' until fragment filtering is supported."
+        )
+
     validate_uri_or_namespace(uri, namespace_impl, table_id)
 
     uri, storage_options = resolve_namespace_table(
diff --git a/tests/test_basic_read_write.py b/tests/test_basic_read_write.py
index 93c96fe9..2625fb19 100644
--- a/tests/test_basic_read_write.py
+++ b/tests/test_basic_read_write.py
@@ -355,6 +355,16 @@ def double_score(x: pa.RecordBatch) -> pa.RecordBatch:
         assert df.columns.tolist() == ["id", "name", "age", "score", "new_column"]
         assert (df["new_column"] == df["score"] * 2).all()
 
+    def test_add_columns_rejects_filter(self, temp_dir):
+        """A non-None filter must raise instead of being silently ignored."""
+        path = Path(temp_dir) / "add_columns_filter.lance"
+        with pytest.raises(NotImplementedError, match="filter"):
+            lr.add_columns(
+                str(path),
+                transform={"new_column": "score * 2"},
+                filter="score > 1",
+            )
+
 
 class TestNamespaceReadWrite:
     """Test cases for read/write with DirectoryNamespace."""