Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3b49eff
WIP: Add relaxed versions for all but Dask
FBruzzesi Jan 11, 2026
336f652
WIP: Add unit tests
FBruzzesi Jan 11, 2026
7d4cb37
fixup col name and pyarrow
FBruzzesi Jan 11, 2026
0f17849
minor standardization
FBruzzesi Jan 11, 2026
cebeeda
pandas-like promote_dtype_backend
FBruzzesi Jan 11, 2026
942af32
dask and tests
FBruzzesi Jan 11, 2026
9743d24
add to_supertype coverage
FBruzzesi Jan 11, 2026
1aa2232
Merge branch 'dtypes/supertyping' into feat/supertyping-relaxed-concat
FBruzzesi Jan 11, 2026
3328f2f
skip ibis diagonal
FBruzzesi Jan 11, 2026
23d0cd4
fix(typing): Make `pyright` happier
dangotbanned Jan 11, 2026
e0ce9eb
fix(typing): Pacify `mypy` for `pandas_like`
dangotbanned Jan 11, 2026
4b484f4
wow that was a useless error message!
dangotbanned Jan 11, 2026
683c835
fix(typing): Tell `mypy` we have a wider type than the first assignment
dangotbanned Jan 11, 2026
5bffccd
perf: Avoid unnecessary `lambda`s
dangotbanned Jan 11, 2026
8fabb13
perf: Use a generator instead of intermediate `dict`
dangotbanned Jan 12, 2026
c658320
perf: Optimize, rename `promote_dtype_backends`
dangotbanned Jan 14, 2026
909f06b
ibis diagonal relaxed
FBruzzesi Jan 20, 2026
c832b72
combine_schemas -> merge_schemas
FBruzzesi Jan 20, 2026
41f8679
merge head
FBruzzesi Jan 30, 2026
4a1b946
preserve unknown for lazy backends
FBruzzesi Jan 30, 2026
cdae5c8
merge head and solve conflicts
FBruzzesi Jan 31, 2026
b205ddb
preserve original dtype if not supported by narwhals
FBruzzesi Jan 31, 2026
61205dd
require pyarrow 19
FBruzzesi Jan 31, 2026
b3576dc
add reason
FBruzzesi Jan 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
combine_evaluate_output_names,
)
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, safe_cast
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
Expand Down Expand Up @@ -97,10 +97,7 @@ def concat(
schemas: Iterable[Schema] = (Schema(df.collect_schema()) for df in items)
out_schema = reduce(to_supertype, schemas)
native_items = (
item.select(
*(self.col(name).cast(dtype) for name, dtype in out_schema.items())
).native
for item in items
item.select(*safe_cast(self, out_schema)).native for item in items
)
res = reduce(DuckDBPyRelation.union, native_items)
return first._with_native(res)
Expand All @@ -117,22 +114,20 @@ def concat(
if how == "diagonal_relaxed":
schemas = [Schema(df.collect_schema()) for df in items]
out_schema = reduce(merge_schemas, schemas)
res, *others = (
native_items = (
item.select(
*(
self.col(name).cast(dtype)
self.col(name)
if name in schema
else self.lit(None, dtype=dtype).alias(name)
for name, dtype in out_schema.items()
)
).native
)
.select(*safe_cast(self, out_schema))
.native
for item, schema in zip(items, schemas)
)
for _item in others:
# TODO(unassigned): use relational API when available https://github.com/duckdb/duckdb/discussions/16996
res = duckdb.sql("""
from res select * union all by name from _item select *
""")
res = reduce(DuckDBPyRelation.union, native_items)
return first._with_native(res)
raise NotImplementedError

Expand Down
9 changes: 3 additions & 6 deletions narwhals/_ibis/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from narwhals._ibis.selectors import IbisSelectorNamespace
from narwhals._ibis.utils import function, lit, narwhals_to_native_dtype
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, safe_cast
from narwhals.schema import Schema, to_supertype

if TYPE_CHECKING:
Expand Down Expand Up @@ -74,11 +74,8 @@ def concat(

if how.endswith("relaxed"):
schemas = (Schema(frame.collect_schema()) for frame in frames)
out_schema = reduce(to_supertype, schemas).items()
frames = [
frame.select(*(self.col(name).cast(dtype) for name, dtype in out_schema))
for frame in frames
]
out_schema = reduce(to_supertype, schemas)
frames = [frame.select(*safe_cast(self, out_schema)) for frame in frames]
try:
result = ibis.union(*(lf.native for lf in frames))
except ibis.IbisError:
Expand Down
14 changes: 14 additions & 0 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,20 @@ def rename(
return cast("NativeNDFrameT", result) # type: ignore[redundant-cast]


@lru_cache(maxsize=16)
def is_dtype_non_pyarrow_string(native_dtype: Any) -> bool:
"""*There is no problem which can't be solved by adding an extra string type* pandas."""
# TODO @dangotbanned: Investigate how we could handle `cudf` without `str(native_dtype)`
# https://github.com/rapidsai/cudf/blob/a32b8cf62c9b086b645b0825b78b99f065b1887f/python/cudf/cudf/utils/dtypes.py#L646-L670
return isinstance(native_dtype, pd.StringDtype) or str(native_dtype) in {
"string",
"string[python]",
"string[pyarrow_numpy]",
"<StringDtype(na_value=nan)>", # why? why? why?
"str",
}


@lru_cache(maxsize=16)
def non_object_native_to_narwhals_dtype(native_dtype: Any, version: Version) -> DType: # noqa: C901, PLR0912
dtype = str(native_dtype)
Expand Down
12 changes: 6 additions & 6 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
true_divide,
)
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import safe_cast
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
Expand Down Expand Up @@ -181,10 +182,7 @@ def concat(
schemas: Iterable[Schema] = (Schema(df.collect_schema()) for df in items)
out_schema = reduce(to_supertype, schemas)
native_items = (
item.select(
*(self.col(name).cast(dtype) for name, dtype in out_schema.items())
).native
for item in items
item.select(*safe_cast(self, out_schema)).native for item in items
)
union = items[0].native.__class__.union
return SparkLikeLazyFrame(
Expand All @@ -199,12 +197,14 @@ def concat(
native_items = (
item.select(
*(
self.col(name).cast(dtype)
self.col(name)
if name in schema
else self.lit(None, dtype=dtype).alias(name)
for name, dtype in out_schema.items()
)
).native
)
.select(*safe_cast(self, out_schema))
.native
for item, schema in zip(items, schemas)
)
union = items[0].native.__class__.union
Expand Down
20 changes: 19 additions & 1 deletion narwhals/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,14 @@
TypeIs,
)

from narwhals._compliant import CompliantExprT, CompliantSeriesT, NativeSeriesT_co
from narwhals import dtypes
from narwhals._compliant import (
CompliantExprT,
CompliantFrameT,
CompliantNamespace,
CompliantSeriesT,
NativeSeriesT_co,
)
from narwhals._compliant.any_namespace import NamespaceAccessor
from narwhals._compliant.typing import (
Accessor,
Expand Down Expand Up @@ -2148,3 +2155,14 @@ def __repr__(self) -> str: # pragma: no cover

# Can be imported from types in Python 3.10
NoneType = type(None)


def safe_cast(
ns: CompliantNamespace[CompliantFrameT, CompliantExprT],
mapping: Mapping[str, dtypes.DType],
) -> Iterable[CompliantExprT]:
Unknown = ns._version.dtypes.Unknown() # noqa: N806
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would:

return (
ns.col(name) if dtype == Unknown else ns.col(name).cast(dtype)
for name, dtype in mapping.items()
)
4 changes: 2 additions & 2 deletions tests/frame/schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,11 +744,11 @@ def test_to_supertype(left: IntoSchema, right: IntoSchema, expected: IntoSchema)
pytest.raises(ComputeError, match="schema names differ: got b, expected a"),
),
(
{"a": nw.String()},
{"a": nw.Binary()},
{"a": nw.Int64()},
pytest.raises(
SchemaMismatchError,
match="failed to determine supertype of String and Int64",
match="failed to determine supertype of Binary and Int64",
),
),
],
Expand Down
Loading