NVIDIA · pzharrington · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 14, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -31,7 +31,7 @@ repos:
       language: python
       types: [python]
       additional_dependencies: ['interrogate==1.7.0']
-      exclude: ^docs/|^physicsnemo/experimental/|^test/
+      exclude: ^docs/|^physicsnemo/experimental/|^test/|^examples/.*/test/
 
 -   repo: https://github.com/igorshubovych/markdownlint-cli
     rev: v0.35.0

diff --git a/examples/weather/healda/README.md b/examples/weather/healda/README.md
@@ -0,0 +1,131 @@
+# HealDA — AI-based Data Assimilation on the HEALPix Grid
+
+> **This recipe is under active construction.**
+> Structure and functionality are subject to changes.
+
+HealDA is a stateless assimilation model that produces a single
+global weather analysis from conventional and satellite
+observations. It operates on a HEALPix level-6 padded XY grid
+and outputs ERA5-compatible atmospheric variables.
+
+This example provides a recipe to train HealDA, with support
+for extension to custom data.
+
+## Setup
+
+Start by installing PhysicsNeMo (if not already installed) with
+the `healda` optional dependency group, along with the packages
+in `requirements.txt`. Then, copy this folder
+(`examples/weather/healda`) to a system with a GPU available.
+Also, prepare a dataset that can serve training data according
+to the protocols outlined in the
+[Generalized Data Loading](#generalized-data-loading) section
+below.
+
+## Generalized Data Loading
+
+The `physicsnemo.experimental.datapipes.healda` package provides
+a composable data loading pipeline with clear extension points.
+The architecture separates components into loaders, transforms,
+datasets, and sampling infrastructure.
+
+### Architecture
+
+```text
+ObsERA5Dataset(era5_data, obs_loader, transform)
+  |  Temporal windowing via FrameIndexGenerator
+  |  __getitems__ -> get() per index -> transform.transform()
+  v
+ChunkedDistributedSampler (contiguous chunks for cache locality)
+  |
+DataLoader (1 worker each, pin_memory, persistent_workers)
+  |
+RoundRobinLoader (interleaves per-worker DataLoaders)
+  |
+prefetch_map(loader, transform.device_transform)
+  |
+Training loop (GPU-ready batch)
+```
+
+### Key Protocols
+
+Custom data sources and transforms plug in via these protocols
+(see `physicsnemo.experimental.datapipes.healda.protocols`):
+
+**`ObsLoader`** — the observation loading interface:
+
+```python
+class MyObsLoader:
+    async def sel_time(self, times):
+        """Return {"obs": [pa.Table, ...]}"""
+        ...
+```
+
+**`Transform`** / **`DeviceTransform`** — two-stage batch
+processing:
+
+```python
+class MyTransform:
+    def transform(self, times, frames):
+        """CPU-side: normalize, encode obs, time features."""
+        ...
+
+    def device_transform(self, batch, device):
+        """GPU-side: move to device, compute obs features."""
+        ...
+```
+
+### Provided Implementations
+
+| Component | Module | Description |
+|---|---|---|
+| `ObsERA5Dataset` | `dataset` | ERA5 state + observations |
+| `UFSUnifiedLoader` | `loaders.ufs_obs` | Parquet obs loader |
+| `ERA5Loader` | `loaders.era5` | Async ERA5 zarr loader |
+| `ERA5ObsTransform` | `transforms.era5_obs` | Two-stage transform |
+| `ChunkedDistributedSampler` | `samplers` | Distributed sampler |
+| `RoundRobinLoader` | `samplers` | Multi-loader interleave |
+| `prefetch_map` | `prefetch` | CUDA stream prefetching |
+
+All modules above are under
+`physicsnemo.experimental.datapipes.healda`.
+
+### Writing a Custom Observation Loader
+
+Implement `async def sel_time(times)` returning a dict with
+observation data per timestamp:
+
+```python
+class GOESRadianceLoader:
+    def __init__(self, data_path, channels):
+        self.data_path = data_path
+        self.channels = channels
+
+    async def sel_time(self, times):
+        tables = []
+        for t in times:
+            table = self._load_goes_radiances(t)
+            tables.append(table)
+        return {"obs": tables}
+```
+
+Then pass it to the dataset:
+
+```python
+from physicsnemo.experimental.datapipes.healda import (
+    ObsERA5Dataset,
+)
+from physicsnemo.experimental.datapipes.healda.transforms.era5_obs import (
+    ERA5ObsTransform,
+)
+from physicsnemo.experimental.datapipes.healda.configs.variable_configs import (
+    VARIABLE_CONFIGS,
+)
+
+dataset = ObsERA5Dataset(
+    era5_data=era5_xr["data"],
+    obs_loader=GOESRadianceLoader(...),
+    transform=ERA5ObsTransform(...),
+    variable_config=VARIABLE_CONFIGS["era5"],
+)
+```
diff --git a/examples/weather/healda/requirements.txt b/examples/weather/healda/requirements.txt
@@ -0,0 +1,9 @@
+# nvidia-physicsnemo[datapipes-extras] 
+cftime
+pyarrow
+dotenv
+earth2grid @ git+https://github.com/NVlabs/earth2grid.git@main
+healpy
+matplotlib
+joblib
+icechunk
diff --git a/examples/weather/healda/test/conftest.py b/examples/weather/healda/test/conftest.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+
+@pytest.fixture
+def device():
+    return "cuda" if torch.cuda.is_available() else "cpu"
diff --git a/examples/weather/healda/test/test_combined_schema.py b/examples/weather/healda/test/test_combined_schema.py
@@ -0,0 +1,73 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the combined observation schema and sensor config consistency."""
+
+import pyarrow as pa
+
+from physicsnemo.experimental.datapipes.healda.configs.combined_schema import (
+    get_channel_table_schema,
+    get_combined_observation_schema,
+)
+from physicsnemo.experimental.datapipes.healda.configs.sensors import (
+    SENSOR_CONFIGS,
+    SENSOR_NAME_TO_ID,
+)
+
+
+def test_combined_schema_has_required_fields():
+    schema = get_combined_observation_schema()
+    required = [
+        "Latitude",
+        "Longitude",
+        "Absolute_Obs_Time",
+        "DA_window",
+        "Platform_ID",
+        "Observation",
+        "Global_Channel_ID",
+    ]
+    for name in required:
+        assert name in schema.names, f"Missing required field: {name}"
+
+
+def test_combined_schema_satellite_fields():
+    schema = get_combined_observation_schema()
+    for name in ["Sat_Zenith_Angle", "Sol_Zenith_Angle", "Scan_Angle"]:
+        assert name in schema.names
+
+
+def test_combined_schema_conventional_fields():
+    schema = get_combined_observation_schema()
+    for name in ["Pressure", "Height", "Observation_Type"]:
+        assert name in schema.names
+
+
+def test_channel_table_schema():
+    schema = get_channel_table_schema()
+    assert "Global_Channel_ID" in schema.names
+    assert "sensor_id" in schema.names
+    assert "mean" in schema.names
+    assert "stddev" in schema.names
+
+
+def test_sensor_configs_consistent():
+    """All sensors in SENSOR_CONFIGS have a matching SENSOR_NAME_TO_ID entry."""
+    for name in SENSOR_CONFIGS:
+        assert name in SENSOR_NAME_TO_ID
+
+
+def test_sensor_channels_positive():
+    for name, cfg in SENSOR_CONFIGS.items():
+        assert cfg.channels > 0, f"Sensor {name} has non-positive channel count"
diff --git a/examples/weather/healda/test/test_features.py b/examples/weather/healda/test/test_features.py
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for observation metadata featurization (standard and extended).
+
+The Triton kernel tests require CUDA and validate that the Triton
+implementation matches the reference Python implementation.
+"""
+
+import pytest
+import torch
+
+from physicsnemo.experimental.datapipes.healda.transforms import (
+    obs_features as standard,
+)
+from physicsnemo.experimental.datapipes.healda.transforms import (
+    obs_features_ext as extended,
+)
+
+
+def _make_obs_data(n, device, include_lat=False):
+    g = torch.Generator(device=device)
+    g.manual_seed(42)
+
+    height = torch.rand(n, device=device, generator=g) * 50000
+    pressure = torch.rand(n, device=device, generator=g) * 1100
+    scan_angle = torch.rand(n, device=device, generator=g) * 100 - 50
+    sat_zenith_angle = torch.rand(n, device=device, generator=g) * 120 - 60
+    sol_zenith_angle = torch.rand(n, device=device, generator=g) * 160 + 10
+
+    # Conv/sat split: NaN height -> satellite, valid height -> conventional
+    is_sat = torch.rand(n, device=device, generator=g) < 0.4
+    height[is_sat] = float("nan")
+    pressure[is_sat] = float("nan")
+    scan_angle[~is_sat] = float("nan")
+    sat_zenith_angle[~is_sat] = float("nan")
+    sol_zenith_angle[~is_sat] = float("nan")
+
+    data = dict(
+        target_time_sec=torch.full(
+            (n,), 1_700_000_000, dtype=torch.int64, device=device
+        ),
+        time=torch.full(
+            (n,), 1_700_000_100_000_000_000, dtype=torch.int64, device=device
+        ),
+        lon=torch.rand(n, device=device, generator=g) * 360 - 180,
+        height=height,
+        pressure=pressure,
+        scan_angle=scan_angle,
+        sat_zenith_angle=sat_zenith_angle,
+        sol_zenith_angle=sol_zenith_angle,
+    )
+    if include_lat:
+        data["lat"] = torch.rand(n, device=device, generator=g) * 180 - 90
+    return data
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available(), reason="CUDA required for Triton kernel"
+)
+@pytest.mark.parametrize("n", [0, 1, 137, 10_000])
+def test_standard_triton_matches_reference(n):
+    device = torch.device("cuda")
+    data = _make_obs_data(max(n, 1), device)
+    if n == 0:
+        data = {k: v[:0] for k, v in data.items()}
+
+    ref = standard._compute_unified_metadata_reference(**data)
+    triton_out = standard.compute_unified_metadata(**data)
+
+    assert ref.shape == triton_out.shape == (n, standard.N_FEATURES)
+    if n > 0:
+        torch.testing.assert_close(ref, triton_out, atol=1e-5, rtol=1e-5)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available(), reason="CUDA required for Triton kernel"
+)
+@pytest.mark.parametrize("n", [0, 1, 137, 10_000])
+def test_extended_triton_matches_reference(n):
+    device = torch.device("cuda")
+    data = _make_obs_data(max(n, 1), device, include_lat=True)
+    if n == 0:
+        data = {k: val[:0] for k, val in data.items()}
+
+    ref = extended._compute_unified_metadata_reference(**data)
+    triton_out = extended.compute_unified_metadata(**data)
+
+    assert ref.shape == triton_out.shape == (n, extended.N_FEATURES)
+    if n > 0:
+        torch.testing.assert_close(ref, triton_out, atol=1e-5, rtol=1e-5)