Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
extract_native,
lit,
parse_datetime_format,
parse_time_format,
)
from narwhals._compliant.any_namespace import StringNamespace

Expand Down Expand Up @@ -80,6 +81,13 @@ def to_datetime(self, format: str | None) -> ArrowSeries:
def to_date(self, format: str | None) -> ArrowSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> ArrowSeries:
format = parse_time_format(self.native) if format is None else format
timestamp_array = pc.strptime(self.native, format=format, unit="us")

nw_time_dtype = self.version.dtypes.Time()
return self.with_native(timestamp_array).cast(nw_time_dtype)

def to_uppercase(self) -> ArrowSeries:
return self.with_native(pc.utf8_upper(self.native))

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def parse_datetime_format(arr: ChunkedArrayAny) -> str:
raise ValueError(msg)

date_value = _parse_date_format(cast("pc.StringArray", matches.field("date")))
time_value = _parse_time_format(cast("pc.StringArray", matches.field("time")))
time_value = parse_time_format(cast("pc.StringArray", matches.field("time")))

sep_value = separators[0].as_py()
tz_value = "%z" if tz[0].as_py() else ""
Expand Down Expand Up @@ -422,7 +422,7 @@ def _parse_date_format(arr: pc.StringArray) -> str:
raise ValueError(msg)


def _parse_time_format(arr: pc.StringArray) -> str:
def parse_time_format(arr: pc.StringArray) -> str:
for time_rgx, time_fmt in TIME_FORMATS:
matches = pc.extract_regex(arr, pattern=time_rgx)
if pc.all(matches.is_valid()).as_py():
Expand Down
1 change: 1 addition & 0 deletions narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def contains(self, pattern: T, *, literal: bool) -> T: ...
def slice(self, offset: int, length: int | None) -> T: ...
def split(self, by: str) -> T: ...
def to_datetime(self, format: str | None) -> T: ...
def to_time(self, format: str | None) -> T: ...
def to_date(self, format: str | None) -> T: ...
def to_lowercase(self) -> T: ...
def to_titlecase(self) -> T: ...
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,9 @@ def to_datetime(self, format: str | None) -> EagerExprT:
def to_date(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_date", format=format)

def to_time(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_time", format=format)

def to_lowercase(self) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_lowercase")

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def to_datetime(self, format: str | None) -> DaskExpr:
lambda expr: dd.to_datetime(expr, format=format)
)

def to_time(self, format: str | None) -> DaskExpr:
msg = "dask backend does not support the Time type"
raise ValueError(msg)

def to_uppercase(self) -> DaskExpr:
return self.compliant._with_callable(lambda expr: expr.str.upper())

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_duckdb/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ def to_date(self, format: str | None) -> DuckDBExpr:
compliant_expr = self.compliant
return compliant_expr.cast(compliant_expr._version.dtypes.Date())

def to_time(self, format: str | None) -> DuckDBExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)

return self.compliant._with_elementwise(
lambda expr: F("strptime", expr, lit(format))
).cast(time_dtype)

@requires.backend_version((1, 2))
def to_titlecase(self) -> DuckDBExpr:
from narwhals._duckdb.utils import lambda_expr
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_ibis/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ def fn(expr: ir.StringColumn) -> ir.DateValue:

return self.compliant._with_callable(fn)

def to_time(self, format: str | None) -> IbisExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)
return self.compliant._with_callable(self._to_datetime_naive(format)).cast(
time_dtype
)

def pad_start(self, length: int, fill_char: str) -> IbisExpr:
def _pad_start(expr: ir.StringColumn) -> ir.Value:
padded = expr.lpad(length, fill_char)
Expand Down
12 changes: 12 additions & 0 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,18 @@ def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
def to_date(self, format: str | None) -> PandasLikeSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> PandasLikeSeries:
if not is_dtype_pyarrow(self.native.dtype):
msg = (
"This operation requires a pyarrow-backed series. "
"Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes "
"and ensure you are using dtype_backend='pyarrow'. "
"Additionally, make sure you have pandas version 1.5+ and pyarrow installed. "
)
raise TypeError(msg)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we follow the same "pattern" that we use when converting from any pandas-like dtype to one that is supported only with pyarrow instead of directly requiring that the dtype is already pyarrow-backed?

I refer to:

def narwhals_to_native_arrow_dtype(
dtype: IntoDType, implementation: Implementation, version: Version
) -> pd.ArrowDtype:
if is_pandas_or_modin(implementation) and PANDAS_VERSION >= (2, 2):
try:
import pyarrow as pa # ignore-banned-import # noqa: F401
except ImportError as exc: # pragma: no cover
msg = (
f"Unable to convert to {dtype} due to the following exception: {exc.msg}"
)
raise ImportError(msg) from exc
from narwhals._arrow.utils import narwhals_to_native_dtype as _to_arrow_dtype
return pd.ArrowDtype(_to_arrow_dtype(dtype, version))
msg = ( # pragma: no cover

Copy link
Copy Markdown
Member Author

@camriddell camriddell Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was following the pattern in _pandas_like...str.spit.

Shall I change both this instance and the one in my PR to follow the reference in _pandas_like.utils.narwhals_to_native_arrow_dtype?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing that out! I guess it's some inconsistency!
I would be ok casting in behalf of the user in these cases, but happy to hear what others think about this

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, I let the mechanics of .cast(Time) handle the auto-conversion after stepping through the datetime dtype.

I also added an xfail for testing environments that are running the non-pyarrow pandas-like constructors but do not have pyarrow installed/available.

time_dtype = self.version.dtypes.Time()
return self.with_native(self._to_datetime(format, utc=False)).cast(time_dtype)

def to_uppercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.upper())

Expand Down
1 change: 1 addition & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ def zfill(self, width: int) -> CompliantT: ...
split: Method[CompliantT]
to_date: Method[CompliantT]
to_datetime: Method[CompliantT]
to_time: Method[CompliantT]
to_lowercase: Method[CompliantT]
to_uppercase: Method[CompliantT]
pad_start: Method[CompliantT]
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def to_date(self, format: str | None) -> SparkLikeExpr:
lambda expr: F.to_date(expr, format=strptime_to_pyspark_format(format))
)

def to_time(self, format: str | None) -> SparkLikeExpr:
msg = "spark-like backends do not support the Time type"
raise ValueError(msg)

def to_titlecase(self) -> SparkLikeExpr:
impl = self.compliant._implementation
sqlframe_required_version = (3, 43, 1)
Expand Down
35 changes: 35 additions & 0 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,41 @@ def to_date(self, format: str | None = None) -> ExprT:
ExprNode(ExprKind.ELEMENTWISE, "str.to_date", format=format)
)

def to_time(self, format: str | None = None) -> ExprT:
"""Convert to Time dtype.
Comment thread
camriddell marked this conversation as resolved.
Outdated

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": ["12:59:21", "18:42:12"]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").str.to_time(format="%H:%M:%S"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (2, 1) |
| ┌──────────┐ |
| │ a │ |
| │ --- │ |
| │ time │ |
| ╞══════════╡ |
| │ 12:59:21 │ |
| │ 18:42:12 │ |
| └──────────┘ |
└──────────────────┘
"""
return self._expr._append_node(
ExprNode(ExprKind.ELEMENTWISE, "str.to_time", format=format)
)

def to_uppercase(self) -> ExprT:
r"""Transform string to uppercase variant.

Expand Down
30 changes: 30 additions & 0 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,36 @@ def to_date(self, format: str | None = None) -> SeriesT:
self._narwhals_series._compliant_series.str.to_date(format=format)
)

def to_time(self, format: str | None = None) -> SeriesT:
"""Convert to Time dtype.
Comment thread
camriddell marked this conversation as resolved.
Outdated

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> s_native = pl.Series(["12:59:21", "18:42:12"])
>>> s = nw.from_native(s_native, series_only=True)
>>> s.str.to_time(
... format="%H:%M:%S"
... ).to_native() # doctest: +NORMALIZE_WHITESPACE
shape: (2,)
Series: '' [time]
[
12:59:21
18:42:12
]
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.to_time(format=format)
)

def to_titlecase(self) -> SeriesT:
"""Modify strings to their titlecase equivalent.

Expand Down
113 changes: 113 additions & 0 deletions tests/expr_and_series/str/to_time_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

import narwhals as nw
from tests.utils import POLARS_VERSION

if TYPE_CHECKING:
from tests.utils import Constructor, ConstructorEager

data = {"a": ["12:34:56"]}


def test_to_time(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if (
("pandas" in str(constructor) and "pyarrow" not in str(constructor))
or "pyspark" in str(constructor)
or "dask" in str(constructor)
):
request.applymarker(pytest.mark.xfail)
expected = "12:34:56"

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time(format="%H:%M:%S"))
)
result_schema = result.collect_schema()
assert isinstance(result_schema["b"], nw.Time)
result_item = result.collect().item(row=0, column="b")
assert str(result_item) == expected


def test_to_time_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if (
("pandas" in str(constructor_eager) and "pyarrow" not in str(constructor_eager))
or "pyspark" in str(constructor_eager)
or "dask" in str(constructor_eager)
):
request.applymarker(pytest.mark.xfail)
expected = "12:34:56.000000000" if "cudf" in str(constructor_eager) else "12:34:56"

result = (
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time(
format="%H:%M:%S"
)
).item(0)
assert str(result) == expected


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
def test_to_time_infer_fmt(
request: pytest.FixtureRequest,
constructor: Constructor,
data: dict[str, list[str]],
expected: str,
) -> None:
if (
(
"polars" in str(constructor)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
)
or ("pandas" in str(constructor) and "pyarrow" not in str(constructor))
or "pyspark" in str(constructor)
or "dask" in str(constructor)
):
request.applymarker(pytest.mark.xfail)

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time())
.collect()
.item(row=0, column="b")
)
assert str(result) == expected


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
def test_to_time_series_infer_fmt(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
data: dict[str, list[str]],
expected: str,
) -> None:
if (
(
"polars" in str(constructor_eager)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
)
or (
"pandas" in str(constructor_eager) and "pyarrow" not in str(constructor_eager)
)
or "pyspark" in str(constructor_eager)
):
request.applymarker(pytest.mark.xfail)

result = (
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time()
).item(0)
assert str(result) == expected
Loading