Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
extract_native,
lit,
parse_datetime_format,
parse_time_format,
)
from narwhals._compliant.any_namespace import StringNamespace

Expand Down Expand Up @@ -80,6 +81,13 @@ def to_datetime(self, format: str | None) -> ArrowSeries:
def to_date(self, format: str | None) -> ArrowSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> ArrowSeries:
format = parse_time_format(self.native) if format is None else format
timestamp_array = pc.strptime(self.native, format=format, unit="us")

nw_time_dtype = self.version.dtypes.Time()
return self.with_native(timestamp_array).cast(nw_time_dtype)

def to_uppercase(self) -> ArrowSeries:
return self.with_native(pc.utf8_upper(self.native))

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def parse_datetime_format(arr: ChunkedArrayAny) -> str:
raise ValueError(msg)

date_value = _parse_date_format(cast("pc.StringArray", matches.field("date")))
time_value = _parse_time_format(cast("pc.StringArray", matches.field("time")))
time_value = parse_time_format(cast("pc.StringArray", matches.field("time")))

sep_value = separators[0].as_py()
tz_value = "%z" if tz[0].as_py() else ""
Expand Down Expand Up @@ -422,7 +422,7 @@ def _parse_date_format(arr: pc.StringArray) -> str:
raise ValueError(msg)


def _parse_time_format(arr: pc.StringArray) -> str:
def parse_time_format(arr: pc.StringArray) -> str:
for time_rgx, time_fmt in TIME_FORMATS:
matches = pc.extract_regex(arr, pattern=time_rgx)
if pc.all(matches.is_valid()).as_py():
Expand Down
1 change: 1 addition & 0 deletions narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def contains(self, pattern: T, *, literal: bool) -> T: ...
def slice(self, offset: int, length: int | None) -> T: ...
def split(self, by: str) -> T: ...
def to_datetime(self, format: str | None) -> T: ...
def to_time(self, format: str | None) -> T: ...
def to_date(self, format: str | None) -> T: ...
def to_lowercase(self) -> T: ...
def to_titlecase(self) -> T: ...
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,9 @@ def to_datetime(self, format: str | None) -> EagerExprT:
def to_date(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_date", format=format)

def to_time(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_time", format=format)

def to_lowercase(self) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_lowercase")

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def to_datetime(self, format: str | None) -> DaskExpr:
lambda expr: dd.to_datetime(expr, format=format)
)

def to_time(self, format: str | None) -> DaskExpr:
msg = "dask backend does not support the Time type"
raise ValueError(msg)

def to_uppercase(self) -> DaskExpr:
return self.compliant._with_callable(lambda expr: expr.str.upper())

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_duckdb/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ def to_date(self, format: str | None) -> DuckDBExpr:
compliant_expr = self.compliant
return compliant_expr.cast(compliant_expr._version.dtypes.Date())

def to_time(self, format: str | None) -> DuckDBExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)

return self.compliant._with_elementwise(
lambda expr: F("strptime", expr, lit(format))
).cast(time_dtype)

@requires.backend_version((1, 2))
def to_titlecase(self) -> DuckDBExpr:
from narwhals._duckdb.utils import lambda_expr
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_ibis/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ def fn(expr: ir.StringColumn) -> ir.DateValue:

return self.compliant._with_callable(fn)

def to_time(self, format: str | None) -> IbisExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)
return self.compliant._with_callable(self._to_datetime_naive(format)).cast(
time_dtype
)

def pad_start(self, length: int, fill_char: str) -> IbisExpr:
def _pad_start(expr: ir.StringColumn) -> ir.Value:
padded = expr.lpad(length, fill_char)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
def to_date(self, format: str | None) -> PandasLikeSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> PandasLikeSeries:
time_dtype = self.version.dtypes.Time()
return self.with_native(self._to_datetime(format, utc=False)).cast(time_dtype)

def to_uppercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.upper())

Expand Down
1 change: 1 addition & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ def zfill(self, width: int) -> CompliantT: ...
split: Method[CompliantT]
to_date: Method[CompliantT]
to_datetime: Method[CompliantT]
to_time: Method[CompliantT]
to_lowercase: Method[CompliantT]
to_uppercase: Method[CompliantT]
pad_start: Method[CompliantT]
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def to_date(self, format: str | None) -> SparkLikeExpr:
lambda expr: F.to_date(expr, format=strptime_to_pyspark_format(format))
)

def to_time(self, format: str | None) -> SparkLikeExpr:
msg = "spark-like backends do not support the Time type"
raise ValueError(msg)

def to_titlecase(self) -> SparkLikeExpr:
impl = self.compliant._implementation
sqlframe_required_version = (3, 43, 1)
Expand Down
35 changes: 35 additions & 0 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,41 @@ def to_date(self, format: str | None = None) -> ExprT:
ExprNode(ExprKind.ELEMENTWISE, "str.to_date", format=format)
)

def to_time(self, format: str | None = None) -> ExprT:
"""Convert to [`narwhals.dtypes.Time`][] dtype.

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": ["12:59:21", "18:42:12"]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").str.to_time(format="%H:%M:%S"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (2, 1) |
| ┌──────────┐ |
| │ a │ |
| │ --- │ |
| │ time │ |
| ╞══════════╡ |
| │ 12:59:21 │ |
| │ 18:42:12 │ |
| └──────────┘ |
└──────────────────┘
"""
return self._expr._append_node(
ExprNode(ExprKind.ELEMENTWISE, "str.to_time", format=format)
)

def to_uppercase(self) -> ExprT:
r"""Transform string to uppercase variant.

Expand Down
30 changes: 30 additions & 0 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,36 @@ def to_date(self, format: str | None = None) -> SeriesT:
self._narwhals_series._compliant_series.str.to_date(format=format)
)

def to_time(self, format: str | None = None) -> SeriesT:
"""Convert to [`narwhals.dtypes.Time`][] dtype.

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> s_native = pl.Series(["12:59:21", "18:42:12"])
>>> s = nw.from_native(s_native, series_only=True)
>>> s.str.to_time(
... format="%H:%M:%S"
... ).to_native() # doctest: +NORMALIZE_WHITESPACE
shape: (2,)
Series: '' [time]
[
12:59:21
18:42:12
]
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.to_time(format=format)
)

def to_titlecase(self) -> SeriesT:
"""Modify strings to their titlecase equivalent.

Expand Down
122 changes: 122 additions & 0 deletions tests/expr_and_series/str/to_time_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

import narwhals as nw
from tests.utils import PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION

if TYPE_CHECKING:
from tests.utils import Constructor, ConstructorEager

data = {"a": ["12:34:56"]}


def requires_time_support(
request: pytest.FixtureRequest, constructor: Constructor | ConstructorEager
) -> None:
"""Enforce Time dtype test expectations for dataframe backends.

Skip or mark tests as expected failures depending on backend capabilities,
version, and pyarrow availability when testing Time dtype support.
"""
if constructor.__name__.startswith(("pandas", "modin")):
if PANDAS_VERSION < (2, 2, 0):
pytest.skip(
"pandas < 2.2.0 has no pyarrow dtype support (and therefore does not support the Time dtype)"
)

if PYARROW_VERSION == (0, 0, 0):
pytest.skip("pandas requires pyarrow for the Time dtype")

if "pyspark" in str(constructor) or "dask" in str(constructor):
request.applymarker(
pytest.mark.xfail(reason="backend does not support Time dtype")
)


def test_to_time(request: pytest.FixtureRequest, constructor: Constructor) -> None:
requires_time_support(request, constructor)

expected = "12:34:56"

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time(format="%H:%M:%S"))
.collect()
)
assert isinstance(result.collect_schema()["b"], nw.Time)
assert str(result.item(row=0, column="b")) == expected


def test_to_time_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
requires_time_support(request, constructor_eager)

expected = "12:34:56.000000000" if "cudf" in str(constructor_eager) else "12:34:56"
result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time(
format="%H:%M:%S"
)

assert isinstance(result.dtype, nw.Time)
assert str(result.item(0)) == expected


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
def test_to_time_infer_fmt(
request: pytest.FixtureRequest,
constructor: Constructor,
data: dict[str, list[str]],
expected: str,
) -> None:
requires_time_support(request, constructor)

if (
"polars" in str(constructor)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
): # pragma: no cover
request.applymarker(
pytest.mark.xfail(reason="Polars<1.30 cannot auto-infer the HH:MM format")
)

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time())
.collect()
)
assert str(result.item(row=0, column="b")) == expected
assert isinstance(result.collect_schema()["b"], nw.Time)


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
def test_to_time_series_infer_fmt(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
data: dict[str, list[str]],
expected: str,
) -> None:
requires_time_support(request, constructor_eager)

if (
"polars" in str(constructor_eager)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
): # pragma: no cover
request.applymarker(
pytest.mark.xfail(reason="Polars<1.30 cannot auto-infer the HH:MM format")
)

result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time()
assert str(result.item(0)) == expected
assert isinstance(result.dtype, nw.Time)
Loading