From a597301e87a9c54d018f123c6d99130cb340e2c4 Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 21:49:54 -0400 Subject: [PATCH 1/9] add narwhals deps --- dlt/common/libs/narwhals.py | 20 ++++++++++++++++++++ pyproject.toml | 1 + uv.lock | 3 +++ 3 files changed, 24 insertions(+) create mode 100644 dlt/common/libs/narwhals.py diff --git a/dlt/common/libs/narwhals.py b/dlt/common/libs/narwhals.py new file mode 100644 index 0000000000..390e855870 --- /dev/null +++ b/dlt/common/libs/narwhals.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import narwhals +from narwhals.typing import IntoDataFrame + +if TYPE_CHECKING: + from dlt.common.libs.pyarrow import pyarrow + + +def df_to_arrow(df: IntoDataFrame) -> pyarrow.Table: + """Converts any narwhals-compatible eager or lazy frame to a pyarrow table. + lazy frames are eagerly collected. + """ + nw_df = narwhals.from_native(df, allow_series=False) + if isinstance(nw_df, narwhals.LazyFrame): + nw_df = nw_df.collect() + + return nw_df.to_arrow() diff --git a/pyproject.toml b/pyproject.toml index b6920623ab..e394187b74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "sqlglot>=25.4.0", "pywin32>=306 ; sys_platform == 'win32'", "rich-argparse>=1.6.0", + "narwhals>=1.41.0", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 7aba02c767..fcb729eb05 100644 --- a/uv.lock +++ b/uv.lock @@ -2458,6 +2458,8 @@ dependencies = [ { name = "giturlparse" }, { name = "humanize" }, { name = "jsonpath-ng" }, + { name = "narwhals", version = "1.41.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "narwhals", version = "2.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "orjson", version = "3.10.18", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.14' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version >= '3.11' and python_full_version < '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, { name = "orjson", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' or (python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version < '3.11' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, { name = "packaging" }, @@ -2839,6 +2841,7 @@ requires-dist = [ { name = "lancedb", marker = "extra == 'lancedb'", specifier = ">=0.22.0" }, { name = "marimo", marker = "extra == 'workspace'", specifier = ">=0.14.5" }, { name = "mowidgets", marker = "python_full_version >= '3.11' and extra == 'workspace'", specifier = ">=0.2.1" }, + { name = "narwhals", specifier = ">=1.41.0" }, { name = "oracledb", marker = "extra == 'oracle'", specifier = ">=3.4.1" }, { name = "orjson", marker = "python_full_version >= '3.14'", specifier = ">=3.11.0" }, { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.6.7,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1,<4" }, From 1408db54893de888da15798b150428f2f8f75f9b Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 22:28:03 -0400 Subject: [PATCH 2/9] remove pandas and polars from dlt/extract/incremental/__init__ --- dlt/common/libs/narwhals.py | 11 +++++++++-- dlt/extract/incremental/__init__.py | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/dlt/common/libs/narwhals.py b/dlt/common/libs/narwhals.py index 390e855870..ab470555ec 100644 --- a/dlt/common/libs/narwhals.py +++ b/dlt/common/libs/narwhals.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import narwhals from narwhals.typing import IntoDataFrame @@ -9,11 +9,18 @@ from dlt.common.libs.pyarrow import pyarrow +def is_dataframe(obj: Any) -> bool: + maybe_converted = narwhals.from_native(obj, allow_series=False, pass_through=True) + if isinstance(maybe_converted, (narwhals.DataFrame, narwhals.LazyFrame)): + return True + return False + + def df_to_arrow(df: IntoDataFrame) -> pyarrow.Table: """Converts any narwhals-compatible eager or lazy frame to a pyarrow table. lazy frames are eagerly collected. """ - nw_df = narwhals.from_native(df, allow_series=False) + nw_df = narwhals.from_native(df, allow_series=False, pass_through=True) if isinstance(nw_df, narwhals.LazyFrame): nw_df = nw_df.collect() diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py index 56ff85dcae..2f5a88628f 100644 --- a/dlt/extract/incremental/__init__.py +++ b/dlt/extract/incremental/__init__.py @@ -17,7 +17,8 @@ from dlt.common import logger from dlt.common.data_types.typing import TDataType from dlt.common.exceptions import ValueErrorWithKnownValues -from dlt.common.libs import is_arrow_object, is_pandas_frame, is_polars_frame +from dlt.common.libs import is_arrow_object +from dlt.common.libs.narwhals import is_dataframe from dlt.common.jsonpath import compile_path, extract_simple_field_name from dlt.common.typing import ( TDataItem, @@ -644,7 +645,7 @@ def _get_transform(self, items: TDataItems) -> IncrementalTransform: """Gets transform implementation that handles particular data item type""" # assume list is all of the same type for item in items if isinstance(items, list) else [items]: - if is_arrow_object(item) or is_pandas_frame(item) or is_polars_frame(item): + if is_arrow_object(item) or is_dataframe(item): return self._make_or_get_transformer(ArrowIncremental) return self._make_or_get_transformer(JsonIncremental) return self._make_or_get_transformer(JsonIncremental) From a6ad157e74f684d1866df2c5d7393672d315e4b7 Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 22:37:33 -0400 Subject: [PATCH 3/9] remove pandas and polars from dlt/extract/extractors.py --- dlt/common/libs/narwhals.py | 2 +- dlt/extract/extractors.py | 18 ++---------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/dlt/common/libs/narwhals.py b/dlt/common/libs/narwhals.py index ab470555ec..d5bce9b0a8 100644 --- a/dlt/common/libs/narwhals.py +++ b/dlt/common/libs/narwhals.py @@ -20,7 +20,7 @@ def df_to_arrow(df: IntoDataFrame) -> pyarrow.Table: """Converts any narwhals-compatible eager or lazy frame to a pyarrow table. lazy frames are eagerly collected. """ - nw_df = narwhals.from_native(df, allow_series=False, pass_through=True) + nw_df = narwhals.from_native(df, allow_series=False) if isinstance(nw_df, narwhals.LazyFrame): nw_df = nw_df.collect() diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 3110cdd02b..6cacda7133 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -9,7 +9,7 @@ DestinationCapabilitiesContext, adjust_schema_to_capabilities, ) -from dlt.common.libs import is_pandas_frame, is_polars_frame +from dlt.common.libs.narwhals import df_to_arrow from dlt.common.metrics import DataWriterMetrics from dlt.common.runtime.collector import Collector, NULL_COLLECTOR from dlt.common.typing import TDataItems, TDataItem, TLoaderFileFormat @@ -35,19 +35,6 @@ from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem -def _to_arrow_table(item: Any) -> Any: - """Convert a pandas or polars frame to a pyarrow Table; pass arrow items through.""" - if is_pandas_frame(item): - from dlt.common.libs.pandas import pandas_to_arrow - - return pandas_to_arrow(item) - if is_polars_frame(item): - from dlt.common.libs.polars import polars_to_arrow - - return polars_to_arrow(item) - return item - - class MaterializedEmptyList(List[Any]): """A list variant that will materialize tables even if empty list was yielded""" @@ -388,8 +375,7 @@ def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> No static_table_name = self._get_static_table_name(resource, meta) items = [ - # 2. remove columns and rows in data contract filters - self._apply_contract_filters(_to_arrow_table(item), resource, static_table_name) + self._apply_contract_filters(df_to_arrow(item), resource, static_table_name) for item in items_list ] super().write_items(resource, items, meta) From b2adf3d733180be4c1631dc9dbbfa24b0cfe2623 Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 22:39:13 -0400 Subject: [PATCH 4/9] remove pandas and polars from dlt/extract/wrappers.py --- dlt/extract/wrappers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dlt/extract/wrappers.py b/dlt/extract/wrappers.py index 1d53d8be23..d96a0a0dc3 100644 --- a/dlt/extract/wrappers.py +++ b/dlt/extract/wrappers.py @@ -1,6 +1,7 @@ from typing import Any -from dlt.common.libs import is_arrow_object, is_pandas_frame, is_polars_frame +from dlt.common.libs import is_arrow_object +from dlt.common.libs.narwhals import is_dataframe def wrap_additional_type(data: Any) -> Any: @@ -9,7 +10,7 @@ def wrap_additional_type(data: Any) -> Any: if data is None: return data - if is_arrow_object(data) or is_pandas_frame(data) or is_polars_frame(data): + if is_arrow_object(data) or is_dataframe(data): return [data] return data From 7b55df9960b016f210ebb171a3b35a53ee244011 Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 22:44:08 -0400 Subject: [PATCH 5/9] remove pandas and polars from dlt/extract/utils.py --- dlt/extract/utils.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 938acb820e..2f3bf37369 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -17,15 +17,8 @@ from functools import wraps from dlt.common.data_writers import TDataItemFormat -from dlt.common.libs import ( - get_pandas_module, - get_polars_module, - get_pyarrow_module, - get_pydantic_module, - is_arrow_object, - is_pandas_frame, - is_polars_frame, -) +from dlt.common.libs import get_pydantic_module, is_arrow_object +from dlt.common.libs.narwhals import is_dataframe from dlt.common.reflection.inspect import isgeneratorfunction from dlt.common.schema.typing import TAnySchemaColumns, TTableSchemaColumns from dlt.common.schema.utils import normalize_schema_name @@ -63,14 +56,11 @@ def get_data_item_format(items: TDataItems) -> TDataItemFormat: if isinstance(items, Relation): return "model" - if get_pyarrow_module() is None and get_pandas_module() is None and get_polars_module() is None: - return "object" - # Assume all items in list are the same type try: if isinstance(items, list): items = items[0] - if is_arrow_object(items) or is_pandas_frame(items) or is_polars_frame(items): + if is_arrow_object(items) or is_dataframe(items): return "arrow" except IndexError: pass From 35c803a8a4bdca9f32ac932ddf71f3012c05bc7d Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 4 May 2026 22:57:12 -0400 Subject: [PATCH 6/9] bumped narwhals version --- pyproject.toml | 2 +- uv.lock | 67 ++++++-------------------------------------------- 2 files changed, 9 insertions(+), 60 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e394187b74..8e38da6c33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ dependencies = [ "sqlglot>=25.4.0", "pywin32>=306 ; sys_platform == 'win32'", "rich-argparse>=1.6.0", - "narwhals>=1.41.0", + "narwhals>=2.20.0", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index fcb729eb05..ef764e1832 100644 --- a/uv.lock +++ b/uv.lock @@ -417,8 +417,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jinja2", marker = "python_full_version >= '3.10'" }, { name = "jsonschema", marker = "python_full_version >= '3.10'" }, - { name = "narwhals", version = "1.41.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "narwhals", version = "2.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "narwhals", marker = "python_full_version >= '3.10'" }, { name = "packaging", marker = "python_full_version >= '3.10'" }, { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] @@ -2458,8 +2457,7 @@ dependencies = [ { name = "giturlparse" }, { name = "humanize" }, { name = "jsonpath-ng" }, - { name = "narwhals", version = "1.41.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "narwhals", version = "2.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "narwhals" }, { name = "orjson", version = "3.10.18", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.14' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version >= '3.11' and python_full_version < '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, { name = "orjson", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' or (python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version < '3.11' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, { name = "packaging" }, @@ -2841,7 +2839,7 @@ requires-dist = [ { name = "lancedb", marker = "extra == 'lancedb'", specifier = ">=0.22.0" }, { name = "marimo", marker = "extra == 'workspace'", specifier = ">=0.14.5" }, { name = "mowidgets", marker = "python_full_version >= '3.11' and extra == 'workspace'", specifier = ">=0.2.1" }, - { name = "narwhals", specifier = ">=1.41.0" }, + { name = "narwhals", specifier = ">=2.20.0" }, { name = "oracledb", marker = "extra == 'oracle'", specifier = ">=3.4.1" }, { name = "orjson", marker = "python_full_version >= '3.14'", specifier = ">=3.11.0" }, { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.6.7,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1,<4" }, @@ -5510,7 +5508,7 @@ dependencies = [ { name = "itsdangerous", marker = "python_full_version < '3.11'" }, { name = "jedi", marker = "python_full_version < '3.11'" }, { name = "markdown", marker = "python_full_version < '3.11'" }, - { name = "narwhals", version = "1.41.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "narwhals", marker = "python_full_version < '3.11'" }, { name = "packaging", marker = "python_full_version < '3.11'" }, { name = "psutil", marker = "python_full_version < '3.11'" }, { name = "pygments", marker = "python_full_version < '3.11'" }, @@ -5567,7 +5565,7 @@ dependencies = [ { name = "loro", marker = "python_full_version >= '3.11'" }, { name = "markdown", marker = "python_full_version >= '3.11'" }, { name = "msgspec", marker = "python_full_version >= '3.11'" }, - { name = "narwhals", version = "2.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "narwhals", marker = "python_full_version >= '3.11'" }, { name = "packaging", marker = "python_full_version >= '3.11'" }, { name = "psutil", marker = "python_full_version >= '3.11'" }, { name = "pygments", marker = "python_full_version >= '3.11'" }, @@ -6578,60 +6576,11 @@ wheels = [ [[package]] name = "narwhals" -version = "1.41.0" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.10.*' and os_name != 'nt' and sys_platform == 'emscripten'", - "python_full_version < '3.10' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version < '3.10' and os_name != 'nt' and sys_platform == 'emscripten'", -] -sdist = { url = "https://files.pythonhosted.org/packages/32/fc/7b9a3689911662be59889b1b0b40e17d5dba6f98080994d86ca1f3154d41/narwhals-1.41.0.tar.gz", hash = "sha256:0ab2e5a1757a19b071e37ca74b53b0b5426789321d68939738337dfddea629b5", size = 488446, upload-time = "2025-05-26T12:46:07.43Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/e0/ade8619846645461c012498f02b93a659e50f07d9d9a6ffefdf5ea2c02a0/narwhals-1.41.0-py3-none-any.whl", hash = "sha256:d958336b40952e4c4b7aeef259a7074851da0800cf902186a58f2faeff97be02", size = 357968, upload-time = "2025-05-26T12:46:05.207Z" }, -] - -[[package]] -name = "narwhals" -version = "2.17.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version >= '3.14' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version >= '3.14' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version >= '3.14' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version >= '3.14' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and os_name != 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and os_name != 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and os_name == 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and os_name != 'nt' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and os_name != 'nt' and sys_platform == 'emscripten'", -] -sdist = { url = "https://files.pythonhosted.org/packages/75/59/81d0f4cad21484083466f278e6b392addd9f4205b48d45b5c8771670ebf8/narwhals-2.17.0.tar.gz", hash = "sha256:ebd5bc95bcfa2f8e89a8ac09e2765a63055162837208e67b42d6eeb6651d5e67", size = 620306, upload-time = "2026-02-23T09:44:34.142Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/f3/257adc69a71011b4c8cda321b00f02c5bf1980ae38ffd05a58d9632d4de8/narwhals-2.20.0.tar.gz", hash = "sha256:c10994975fa7dc5a68c2cffcddbd5908fc8ebb2d463c5bab085309c0ee1f551e", size = 627848, upload-time = "2026-04-20T12:11:45.427Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/27/20770bd6bf8fbe1e16f848ba21da9df061f38d2e6483952c29d2bb5d1d8b/narwhals-2.17.0-py3-none-any.whl", hash = "sha256:2ac5307b7c2b275a7d66eeda906b8605e3d7a760951e188dcfff86e8ebe083dd", size = 444897, upload-time = "2026-02-23T09:44:32.006Z" }, + { url = "https://files.pythonhosted.org/packages/d0/69/f24d3d1c38ad69e256138b4ec2452a8c7cf66be49dc214771ae99dd4f0a0/narwhals-2.20.0-py3-none-any.whl", hash = "sha256:16e750ea5507d4ba6e8d03455b5f93a535e0405976561baea235bca5dc9f475d", size = 449373, upload-time = "2026-04-20T12:11:43.596Z" }, ] [[package]] From 61e510fae97595214be8ad4abd27fbc1c887969f Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Tue, 5 May 2026 10:01:50 -0400 Subject: [PATCH 7/9] handle pyarrow recordbatch --- dlt/extract/extractors.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 6cacda7133..7d4fb1f9fc 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -9,6 +9,7 @@ DestinationCapabilitiesContext, adjust_schema_to_capabilities, ) +from dlt.common.libs import is_arrow_object from dlt.common.libs.narwhals import df_to_arrow from dlt.common.metrics import DataWriterMetrics from dlt.common.runtime.collector import Collector, NULL_COLLECTOR @@ -371,13 +372,22 @@ def _retrieve_normalize_config(self) -> ItemsNormalizerConfiguration: ) def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> None: + static_table_name = self._get_static_table_name(resource, meta) + items_list = items if isinstance(items, list) else [items] + items = [] + for item in items_list: + if not is_arrow_object(item): + try: + item = df_to_arrow(item) + except TypeError: + raise TypeError( + f"Received unsupported type `{type(item)}`. Not supported by pyarrow nor" + " narwhals." + ) + + items.append(self._apply_contract_filters(item, resource, static_table_name)) - static_table_name = self._get_static_table_name(resource, meta) - items = [ - self._apply_contract_filters(df_to_arrow(item), resource, static_table_name) - for item in items_list - ] super().write_items(resource, items, meta) def _write_to_static_table( From 5df5b4b1d45305afcb020c49098b38f22109238c Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 11 May 2026 09:15:45 -0400 Subject: [PATCH 8/9] use native narwhals.dependencies.is_into_dataframe() --- dlt/common/libs/narwhals.py | 7 ------- dlt/extract/incremental/__init__.py | 4 ++-- dlt/extract/utils.py | 4 ++-- dlt/extract/wrappers.py | 4 ++-- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/dlt/common/libs/narwhals.py b/dlt/common/libs/narwhals.py index d5bce9b0a8..101f098bd4 100644 --- a/dlt/common/libs/narwhals.py +++ b/dlt/common/libs/narwhals.py @@ -9,13 +9,6 @@ from dlt.common.libs.pyarrow import pyarrow -def is_dataframe(obj: Any) -> bool: - maybe_converted = narwhals.from_native(obj, allow_series=False, pass_through=True) - if isinstance(maybe_converted, (narwhals.DataFrame, narwhals.LazyFrame)): - return True - return False - - def df_to_arrow(df: IntoDataFrame) -> pyarrow.Table: """Converts any narwhals-compatible eager or lazy frame to a pyarrow table. lazy frames are eagerly collected. diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py index 2f5a88628f..1d33fb73a1 100644 --- a/dlt/extract/incremental/__init__.py +++ b/dlt/extract/incremental/__init__.py @@ -18,7 +18,7 @@ from dlt.common.data_types.typing import TDataType from dlt.common.exceptions import ValueErrorWithKnownValues from dlt.common.libs import is_arrow_object -from dlt.common.libs.narwhals import is_dataframe +from dlt.common.libs.narwhals import narwhals from dlt.common.jsonpath import compile_path, extract_simple_field_name from dlt.common.typing import ( TDataItem, @@ -645,7 +645,7 @@ def _get_transform(self, items: TDataItems) -> IncrementalTransform: """Gets transform implementation that handles particular data item type""" # assume list is all of the same type for item in items if isinstance(items, list) else [items]: - if is_arrow_object(item) or is_dataframe(item): + if is_arrow_object(item) or narwhals.dependencies.is_into_dataframe(item): return self._make_or_get_transformer(ArrowIncremental) return self._make_or_get_transformer(JsonIncremental) return self._make_or_get_transformer(JsonIncremental) diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 2f3bf37369..23bdd6d32a 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -18,7 +18,7 @@ from dlt.common.data_writers import TDataItemFormat from dlt.common.libs import get_pydantic_module, is_arrow_object -from dlt.common.libs.narwhals import is_dataframe +from dlt.common.libs.narwhals import narwhals from dlt.common.reflection.inspect import isgeneratorfunction from dlt.common.schema.typing import TAnySchemaColumns, TTableSchemaColumns from dlt.common.schema.utils import normalize_schema_name @@ -60,7 +60,7 @@ def get_data_item_format(items: TDataItems) -> TDataItemFormat: try: if isinstance(items, list): items = items[0] - if is_arrow_object(items) or is_dataframe(items): + if is_arrow_object(items) or narwhals.dependencies.is_into_dataframe(items): return "arrow" except IndexError: pass diff --git a/dlt/extract/wrappers.py b/dlt/extract/wrappers.py index d96a0a0dc3..07c59be33e 100644 --- a/dlt/extract/wrappers.py +++ b/dlt/extract/wrappers.py @@ -1,7 +1,7 @@ from typing import Any from dlt.common.libs import is_arrow_object -from dlt.common.libs.narwhals import is_dataframe +from dlt.common.libs.narwhals import narwhals def wrap_additional_type(data: Any) -> Any: @@ -10,7 +10,7 @@ def wrap_additional_type(data: Any) -> Any: if data is None: return data - if is_arrow_object(data) or is_dataframe(data): + if is_arrow_object(data) or narwhals.dependencies.is_into_dataframe(data): return [data] return data From a62a381b49318e938620172b3e2fd0c0ef69bc58 Mon Sep 17 00:00:00 2001 From: zilto <68975210+zilto@users.noreply.github.com> Date: Mon, 11 May 2026 17:11:53 -0400 Subject: [PATCH 9/9] narwhals typing fix --- dlt/common/libs/narwhals.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dlt/common/libs/narwhals.py b/dlt/common/libs/narwhals.py index 101f098bd4..e3d912f3d6 100644 --- a/dlt/common/libs/narwhals.py +++ b/dlt/common/libs/narwhals.py @@ -1,15 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import narwhals -from narwhals.typing import IntoDataFrame + if TYPE_CHECKING: + from narwhals.typing import IntoFrame + from dlt.common.libs.pyarrow import pyarrow -def df_to_arrow(df: IntoDataFrame) -> pyarrow.Table: +def df_to_arrow(df: IntoFrame) -> pyarrow.Table: """Converts any narwhals-compatible eager or lazy frame to a pyarrow table. lazy frames are eagerly collected. """