diff --git a/docs/api-reference/expr_str.md b/docs/api-reference/expr_str.md index 9fc5e85265..c30394261f 100644 --- a/docs/api-reference/expr_str.md +++ b/docs/api-reference/expr_str.md @@ -20,6 +20,7 @@ - to_date - to_datetime - to_lowercase + - to_time - to_titlecase - to_uppercase - zfill diff --git a/docs/api-reference/series_str.md b/docs/api-reference/series_str.md index a5c4bab882..f5232bbc82 100644 --- a/docs/api-reference/series_str.md +++ b/docs/api-reference/series_str.md @@ -20,6 +20,7 @@ - to_date - to_datetime - to_lowercase + - to_time - to_titlecase - to_uppercase - zfill diff --git a/narwhals/_arrow/series_str.py b/narwhals/_arrow/series_str.py index 762be63d55..8c3c2c4959 100644 --- a/narwhals/_arrow/series_str.py +++ b/narwhals/_arrow/series_str.py @@ -11,6 +11,7 @@ extract_native, lit, parse_datetime_format, + parse_time_format, ) from narwhals._compliant.any_namespace import StringNamespace @@ -80,6 +81,13 @@ def to_datetime(self, format: str | None) -> ArrowSeries: def to_date(self, format: str | None) -> ArrowSeries: return self.to_datetime(format=format).dt.date() + def to_time(self, format: str | None) -> ArrowSeries: + format = parse_time_format(self.native) if format is None else format + timestamp_array = pc.strptime(self.native, format=format, unit="us") + + nw_time_dtype = self.version.dtypes.Time() + return self.with_native(timestamp_array).cast(nw_time_dtype) + def to_uppercase(self) -> ArrowSeries: return self.with_native(pc.utf8_upper(self.native)) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 5a678b5bf4..654e9bc41b 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -394,7 +394,7 @@ def parse_datetime_format(arr: ChunkedArrayAny) -> str: raise ValueError(msg) date_value = _parse_date_format(cast("pc.StringArray", matches.field("date"))) - time_value = _parse_time_format(cast("pc.StringArray", matches.field("time"))) + time_value = parse_time_format(cast("pc.StringArray", matches.field("time"))) sep_value = separators[0].as_py() tz_value = "%z" if tz[0].as_py() else "" @@ -422,7 +422,7 @@ def _parse_date_format(arr: pc.StringArray) -> str: raise ValueError(msg) -def _parse_time_format(arr: pc.StringArray) -> str: +def parse_time_format(arr: pc.StringArray) -> str: for time_rgx, time_fmt in TIME_FORMATS: matches = pc.extract_regex(arr, pattern=time_rgx) if pc.all(matches.is_valid()).as_py(): diff --git a/narwhals/_compliant/any_namespace.py b/narwhals/_compliant/any_namespace.py index 27354eb7ff..b8c16c6e6a 100644 --- a/narwhals/_compliant/any_namespace.py +++ b/narwhals/_compliant/any_namespace.py @@ -102,6 +102,7 @@ def contains(self, pattern: T, *, literal: bool) -> T: ... def slice(self, offset: int, length: int | None) -> T: ... def split(self, by: str) -> T: ... def to_datetime(self, format: str | None) -> T: ... + def to_time(self, format: str | None) -> T: ... def to_date(self, format: str | None) -> T: ... def to_lowercase(self) -> T: ... def to_titlecase(self) -> T: ... diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 1a138f35f3..5d41af13c3 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -1154,6 +1154,9 @@ def to_datetime(self, format: str | None) -> EagerExprT: def to_date(self, format: str | None) -> EagerExprT: return self.compliant._reuse_series_namespace("str", "to_date", format=format) + def to_time(self, format: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_time", format=format) + def to_lowercase(self) -> EagerExprT: return self.compliant._reuse_series_namespace("str", "to_lowercase") diff --git a/narwhals/_dask/expr_str.py b/narwhals/_dask/expr_str.py index 411d5a5fec..c5fb6442e8 100644 --- a/narwhals/_dask/expr_str.py +++ b/narwhals/_dask/expr_str.py @@ -73,6 +73,10 @@ def to_datetime(self, format: str | None) -> DaskExpr: lambda expr: dd.to_datetime(expr, format=format) ) + def to_time(self, format: str | None) -> DaskExpr: + msg = "dask backend does not support the Time type" + raise ValueError(msg) + def to_uppercase(self) -> DaskExpr: return self.compliant._with_callable(lambda expr: expr.str.upper()) diff --git a/narwhals/_duckdb/expr_str.py b/narwhals/_duckdb/expr_str.py index 4191cef69f..8c80e84453 100644 --- a/narwhals/_duckdb/expr_str.py +++ b/narwhals/_duckdb/expr_str.py @@ -29,6 +29,15 @@ def to_date(self, format: str | None) -> DuckDBExpr: compliant_expr = self.compliant return compliant_expr.cast(compliant_expr._version.dtypes.Date()) + def to_time(self, format: str | None) -> DuckDBExpr: + time_dtype = self.compliant._version.dtypes.Time() + if format is None: + return self.compliant.cast(time_dtype) + + return self.compliant._with_elementwise( + lambda expr: F("strptime", expr, lit(format)) + ).cast(time_dtype) + @requires.backend_version((1, 2)) def to_titlecase(self) -> DuckDBExpr: from narwhals._duckdb.utils import lambda_expr diff --git a/narwhals/_ibis/expr_str.py b/narwhals/_ibis/expr_str.py index c6564ba74c..2796bb3efe 100644 --- a/narwhals/_ibis/expr_str.py +++ b/narwhals/_ibis/expr_str.py @@ -77,6 +77,14 @@ def fn(expr: ir.StringColumn) -> ir.DateValue: return self.compliant._with_callable(fn) + def to_time(self, format: str | None) -> IbisExpr: + time_dtype = self.compliant._version.dtypes.Time() + if format is None: + return self.compliant.cast(time_dtype) + return self.compliant._with_callable(self._to_datetime_naive(format)).cast( + time_dtype + ) + def pad_start(self, length: int, fill_char: str) -> IbisExpr: def _pad_start(expr: ir.StringColumn) -> ir.Value: padded = expr.lpad(length, fill_char) diff --git a/narwhals/_pandas_like/series_str.py b/narwhals/_pandas_like/series_str.py index 068d74100d..a856b3b07d 100644 --- a/narwhals/_pandas_like/series_str.py +++ b/narwhals/_pandas_like/series_str.py @@ -92,6 +92,10 @@ def _to_datetime(self, format: str | None, *, utc: bool) -> Any: def to_date(self, format: str | None) -> PandasLikeSeries: return self.to_datetime(format=format).dt.date() + def to_time(self, format: str | None) -> PandasLikeSeries: + time_dtype = self.version.dtypes.Time() + return self.with_native(self._to_datetime(format, utc=False)).cast(time_dtype) + def to_uppercase(self) -> PandasLikeSeries: return self.with_native(self.native.str.upper()) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 1011f7ce93..75d21b68dd 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -341,6 +341,7 @@ def zfill(self, width: int) -> CompliantT: ... split: Method[CompliantT] to_date: Method[CompliantT] to_datetime: Method[CompliantT] + to_time: Method[CompliantT] to_lowercase: Method[CompliantT] to_uppercase: Method[CompliantT] pad_start: Method[CompliantT] diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index 7b17340d29..1336ac5744 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -35,6 +35,10 @@ def to_date(self, format: str | None) -> SparkLikeExpr: lambda expr: F.to_date(expr, format=strptime_to_pyspark_format(format)) ) + def to_time(self, format: str | None) -> SparkLikeExpr: + msg = "spark-like backends do not support the Time type" + raise ValueError(msg) + def to_titlecase(self) -> SparkLikeExpr: impl = self.compliant._implementation sqlframe_required_version = (3, 43, 1) diff --git a/narwhals/expr_str.py b/narwhals/expr_str.py index 2dcfccac83..c9fbab60f4 100644 --- a/narwhals/expr_str.py +++ b/narwhals/expr_str.py @@ -406,6 +406,41 @@ def to_date(self, format: str | None = None) -> ExprT: ExprNode(ExprKind.ELEMENTWISE, "str.to_date", format=format) ) + def to_time(self, format: str | None = None) -> ExprT: + """Convert to [`narwhals.dtypes.Time`][] dtype. + + Warning: + As different backends auto-infer format in different ways, if `format=None` + there is no guarantee that the result will be equal. + + Arguments: + format: Format to use for conversion. If set to None (default), the format is + inferred from the data. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": ["12:59:21", "18:42:12"]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a").str.to_time(format="%H:%M:%S")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 1) | + | ┌──────────┐ | + | │ a │ | + | │ --- │ | + | │ time │ | + | ╞══════════╡ | + | │ 12:59:21 │ | + | │ 18:42:12 │ | + | └──────────┘ | + └──────────────────┘ + """ + return self._expr._append_node( + ExprNode(ExprKind.ELEMENTWISE, "str.to_time", format=format) + ) + def to_uppercase(self) -> ExprT: r"""Transform string to uppercase variant. diff --git a/narwhals/series_str.py b/narwhals/series_str.py index bd26fe682a..47f596f635 100644 --- a/narwhals/series_str.py +++ b/narwhals/series_str.py @@ -399,6 +399,36 @@ def to_date(self, format: str | None = None) -> SeriesT: self._narwhals_series._compliant_series.str.to_date(format=format) ) + def to_time(self, format: str | None = None) -> SeriesT: + """Convert to [`narwhals.dtypes.Time`][] dtype. + + Warning: + As different backends auto-infer format in different ways, if `format=None` + there is no guarantee that the result will be equal. + + Arguments: + format: Format to use for conversion. If set to None (default), the format is + inferred from the data. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series(["12:59:21", "18:42:12"]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.str.to_time( + ... format="%H:%M:%S" + ... ).to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [time] + [ + 12:59:21 + 18:42:12 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.str.to_time(format=format) + ) + def to_titlecase(self) -> SeriesT: """Modify strings to their titlecase equivalent. diff --git a/tests/expr_and_series/str/to_time_test.py b/tests/expr_and_series/str/to_time_test.py new file mode 100644 index 0000000000..fceed2688d --- /dev/null +++ b/tests/expr_and_series/str/to_time_test.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw +from tests.utils import PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": ["12:34:56"]} + + +def requires_time_support( + request: pytest.FixtureRequest, constructor: Constructor | ConstructorEager +) -> None: + """Enforce Time dtype test expectations for dataframe backends. + + Skip or mark tests as expected failures depending on backend capabilities, + version, and pyarrow availability when testing Time dtype support. + """ + if constructor.__name__.startswith(("pandas", "modin")): + if PANDAS_VERSION < (2, 2, 0): + pytest.skip( + "pandas < 2.2.0 has no pyarrow dtype support (and therefore does not support the Time dtype)" + ) + + if PYARROW_VERSION == (0, 0, 0): + pytest.skip("pandas requires pyarrow for the Time dtype") + + if "pyspark" in str(constructor) or "dask" in str(constructor): + request.applymarker( + pytest.mark.xfail(reason="backend does not support Time dtype") + ) + + +def test_to_time(request: pytest.FixtureRequest, constructor: Constructor) -> None: + requires_time_support(request, constructor) + + expected = "12:34:56" + + result = ( + nw.from_native(constructor(data)) + .lazy() + .select(b=nw.col("a").str.to_time(format="%H:%M:%S")) + .collect() + ) + assert isinstance(result.collect_schema()["b"], nw.Time) + assert str(result.item(row=0, column="b")) == expected + + +def test_to_time_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + requires_time_support(request, constructor_eager) + + expected = "12:34:56.000000000" if "cudf" in str(constructor_eager) else "12:34:56" + result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time( + format="%H:%M:%S" + ) + + assert isinstance(result.dtype, nw.Time) + assert str(result.item(0)) == expected + + +@pytest.mark.parametrize( + ("data", "expected"), + [({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")], +) +def test_to_time_infer_fmt( + request: pytest.FixtureRequest, + constructor: Constructor, + data: dict[str, list[str]], + expected: str, +) -> None: + requires_time_support(request, constructor) + + if ( + "polars" in str(constructor) + and POLARS_VERSION < (1, 30) + and data["a"][0].count(":") < 2 + ): # pragma: no cover + request.applymarker( + pytest.mark.xfail(reason="Polars<1.30 cannot auto-infer the HH:MM format") + ) + + result = ( + nw.from_native(constructor(data)) + .lazy() + .select(b=nw.col("a").str.to_time()) + .collect() + ) + assert str(result.item(row=0, column="b")) == expected + assert isinstance(result.collect_schema()["b"], nw.Time) + + +@pytest.mark.parametrize( + ("data", "expected"), + [({"a": ["12:34:56"]}, "12:34:56"), ({"a": ["12:34"]}, "12:34:00")], +) +def test_to_time_series_infer_fmt( + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + data: dict[str, list[str]], + expected: str, +) -> None: + requires_time_support(request, constructor_eager) + + if ( + "polars" in str(constructor_eager) + and POLARS_VERSION < (1, 30) + and data["a"][0].count(":") < 2 + ): # pragma: no cover + request.applymarker( + pytest.mark.xfail(reason="Polars<1.30 cannot auto-infer the HH:MM format") + ) + + result = nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_time() + assert str(result.item(0)) == expected + assert isinstance(result.dtype, nw.Time)