Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- to_date
- to_datetime
- to_lowercase
- to_time
- to_titlecase
- to_uppercase
- zfill
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
extract_native,
lit,
parse_datetime_format,
parse_time_format,
)
from narwhals._compliant.any_namespace import StringNamespace

Expand Down Expand Up @@ -80,6 +81,13 @@ def to_datetime(self, format: str | None) -> ArrowSeries:
def to_date(self, format: str | None) -> ArrowSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> ArrowSeries:
format = parse_time_format(self.native) if format is None else format
timestamp_array = pc.strptime(self.native, format=format, unit="us")

nw_time_dtype = self.version.dtypes.Time()
return self.with_native(timestamp_array).cast(nw_time_dtype)

def to_uppercase(self) -> ArrowSeries:
return self.with_native(pc.utf8_upper(self.native))

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def parse_datetime_format(arr: ChunkedArrayAny) -> str:
raise ValueError(msg)

date_value = _parse_date_format(cast("pc.StringArray", matches.field("date")))
time_value = _parse_time_format(cast("pc.StringArray", matches.field("time")))
time_value = parse_time_format(cast("pc.StringArray", matches.field("time")))

sep_value = separators[0].as_py()
tz_value = "%z" if tz[0].as_py() else ""
Expand Down Expand Up @@ -422,7 +422,7 @@ def _parse_date_format(arr: pc.StringArray) -> str:
raise ValueError(msg)


def _parse_time_format(arr: pc.StringArray) -> str:
def parse_time_format(arr: pc.StringArray) -> str:
for time_rgx, time_fmt in TIME_FORMATS:
matches = pc.extract_regex(arr, pattern=time_rgx)
if pc.all(matches.is_valid()).as_py():
Expand Down
1 change: 1 addition & 0 deletions narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def contains(self, pattern: T, *, literal: bool) -> T: ...
def slice(self, offset: int, length: int | None) -> T: ...
def split(self, by: str) -> T: ...
def to_datetime(self, format: str | None) -> T: ...
def to_time(self, format: str | None) -> T: ...
def to_date(self, format: str | None) -> T: ...
def to_lowercase(self) -> T: ...
def to_titlecase(self) -> T: ...
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,9 @@ def to_datetime(self, format: str | None) -> EagerExprT:
def to_date(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_date", format=format)

def to_time(self, format: str | None) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_time", format=format)

def to_lowercase(self) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_lowercase")

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def to_datetime(self, format: str | None) -> DaskExpr:
lambda expr: dd.to_datetime(expr, format=format)
)

def to_time(self, format: str | None) -> DaskExpr:
msg = "dask backend does not support the Time type"
raise ValueError(msg)

def to_uppercase(self) -> DaskExpr:
return self.compliant._with_callable(lambda expr: expr.str.upper())

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_duckdb/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ def to_date(self, format: str | None) -> DuckDBExpr:
compliant_expr = self.compliant
return compliant_expr.cast(compliant_expr._version.dtypes.Date())

def to_time(self, format: str | None) -> DuckDBExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)

return self.compliant._with_elementwise(
lambda expr: F("strptime", expr, lit(format))
).cast(time_dtype)

@requires.backend_version((1, 2))
def to_titlecase(self) -> DuckDBExpr:
from narwhals._duckdb.utils import lambda_expr
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_ibis/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ def fn(expr: ir.StringColumn) -> ir.DateValue:

return self.compliant._with_callable(fn)

def to_time(self, format: str | None) -> IbisExpr:
time_dtype = self.compliant._version.dtypes.Time()
if format is None:
return self.compliant.cast(time_dtype)
return self.compliant._with_callable(self._to_datetime_naive(format)).cast(
time_dtype
)

def pad_start(self, length: int, fill_char: str) -> IbisExpr:
def _pad_start(expr: ir.StringColumn) -> ir.Value:
padded = expr.lpad(length, fill_char)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
def to_date(self, format: str | None) -> PandasLikeSeries:
return self.to_datetime(format=format).dt.date()

def to_time(self, format: str | None) -> PandasLikeSeries:
time_dtype = self.version.dtypes.Time()
return self.with_native(self._to_datetime(format, utc=False)).cast(time_dtype)

def to_uppercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.upper())

Expand Down
1 change: 1 addition & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ def zfill(self, width: int) -> CompliantT: ...
split: Method[CompliantT]
to_date: Method[CompliantT]
to_datetime: Method[CompliantT]
to_time: Method[CompliantT]
to_lowercase: Method[CompliantT]
to_uppercase: Method[CompliantT]
pad_start: Method[CompliantT]
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def to_date(self, format: str | None) -> SparkLikeExpr:
lambda expr: F.to_date(expr, format=strptime_to_pyspark_format(format))
)

def to_time(self, format: str | None) -> SparkLikeExpr:
msg = "spark-like backends do not support the Time type"
raise ValueError(msg)

def to_titlecase(self) -> SparkLikeExpr:
impl = self.compliant._implementation
sqlframe_required_version = (3, 43, 1)
Expand Down
35 changes: 35 additions & 0 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,41 @@ def to_date(self, format: str | None = None) -> ExprT:
ExprNode(ExprKind.ELEMENTWISE, "str.to_date", format=format)
)

def to_time(self, format: str | None = None) -> ExprT:
"""Convert to [`narwhals.dtypes.Time`][] dtype.

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": ["12:59:21", "18:42:12"]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").str.to_time(format="%H:%M:%S"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (2, 1) |
| ┌──────────┐ |
| │ a │ |
| │ --- │ |
| │ time │ |
| ╞══════════╡ |
| │ 12:59:21 │ |
| │ 18:42:12 │ |
| └──────────┘ |
└──────────────────┘
"""
return self._expr._append_node(
ExprNode(ExprKind.ELEMENTWISE, "str.to_time", format=format)
)

def to_uppercase(self) -> ExprT:
r"""Transform string to uppercase variant.

Expand Down
30 changes: 30 additions & 0 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,36 @@ def to_date(self, format: str | None = None) -> SeriesT:
self._narwhals_series._compliant_series.str.to_date(format=format)
)

def to_time(self, format: str | None = None) -> SeriesT:
"""Convert to [`narwhals.dtypes.Time`][] dtype.

Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.

Arguments:
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> s_native = pl.Series(["12:59:21", "18:42:12"])
>>> s = nw.from_native(s_native, series_only=True)
>>> s.str.to_time(
... format="%H:%M:%S"
... ).to_native() # doctest: +NORMALIZE_WHITESPACE
shape: (2,)
Series: '' [time]
[
12:59:21
18:42:12
]
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.to_time(format=format)
)

def to_titlecase(self) -> SeriesT:
"""Modify strings to their titlecase equivalent.

Expand Down
129 changes: 129 additions & 0 deletions tests/expr_and_series/str/to_time_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from __future__ import annotations

from importlib.util import find_spec
from typing import TYPE_CHECKING

import pytest

import narwhals as nw
from tests.utils import PANDAS_VERSION, POLARS_VERSION

if TYPE_CHECKING:
from tests.utils import Constructor, ConstructorEager

data = {"a": ["12:34:56"]}


def is_pandaslike_without_pyarrow(constructor: Constructor | ConstructorEager) -> bool:
"""Returns True for pandas constructor that does not specify pyarrow and pyarrow is not importable.

Testing environments that do have pandas but not pyarrow available should to xfail to .str.to_time.

pandas does not natively support the Time datatype. As such, Narwhals
attempts to automatically convert pandas series to a pyarrow-backed pandas
series if pyarrow is available.
"""
name = constructor.__name__
return (
name.startswith(("pandas", "modin"))
and ("pyarrow" not in name)
and (find_spec("pyarrow") is None)
)


@pytest.mark.skipif(PANDAS_VERSION < (2, 2, 0), reason="pyarrow dtype not available")
Comment thread
FBruzzesi marked this conversation as resolved.
Outdated
def test_to_time(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if is_pandaslike_without_pyarrow(constructor) or (
"pyspark" in str(constructor) or "dask" in str(constructor)
):
request.applymarker(pytest.mark.xfail)

expected = "12:34:56"

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time(format="%H:%M:%S"))
.collect()
)
assert isinstance(result.collect_schema()["b"], nw.Time)
assert str(result.item(row=0, column="b")) == expected


@pytest.mark.skipif(PANDAS_VERSION < (2, 2, 0), reason="pyarrow dtype not available")
def test_to_time_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if (
is_pandaslike_without_pyarrow(constructor_eager)
or "pyspark" in str(constructor_eager)
or "dask" in str(constructor_eager)
):
request.applymarker(pytest.mark.xfail)
expected = "12:34:56.000000000" if "cudf" in str(constructor_eager) else "12:34:56"
result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time(
format="%H:%M:%S"
)

assert isinstance(result.dtype, nw.Time)
assert str(result.item(0)) == expected


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
@pytest.mark.skipif(PANDAS_VERSION < (2, 2, 0), reason="pyarrow dtype not available")
def test_to_time_infer_fmt(
request: pytest.FixtureRequest,
constructor: Constructor,
data: dict[str, list[str]],
expected: str,
) -> None:
if (
(
"polars" in str(constructor)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
)
or is_pandaslike_without_pyarrow(constructor)
or "pyspark" in str(constructor)
or "dask" in str(constructor)
):
request.applymarker(pytest.mark.xfail)

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_time())
.collect()
)
assert str(result.item(row=0, column="b")) == expected
assert isinstance(result.collect_schema()["b"], nw.Time)


@pytest.mark.parametrize(
("data", "expected"),
[({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")],
)
@pytest.mark.skipif(PANDAS_VERSION < (2, 2, 0), reason="pyarrow dtype not available")
def test_to_time_series_infer_fmt(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
data: dict[str, list[str]],
expected: str,
) -> None:
if (
(
"polars" in str(constructor_eager)
and POLARS_VERSION < (1, 30)
and data["a"][0].count(":") < 2
)
or is_pandaslike_without_pyarrow(constructor_eager)
or "pyspark" in str(constructor_eager)
):
request.applymarker(pytest.mark.xfail)

result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time()
assert str(result.item(0)) == expected
assert isinstance(result.dtype, nw.Time)
Loading